summaryrefslogtreecommitdiffstats
path: root/lib/librte_eal
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_eal')
-rw-r--r--lib/librte_eal/Makefile38
-rw-r--r--lib/librte_eal/bsdapp/Makefile38
-rw-r--r--lib/librte_eal/bsdapp/contigmem/BSDmakefile36
-rw-r--r--lib/librte_eal/bsdapp/contigmem/Makefile52
-rw-r--r--lib/librte_eal/bsdapp/contigmem/contigmem.c232
-rw-r--r--lib/librte_eal/bsdapp/eal/Makefile117
-rw-r--r--lib/librte_eal/bsdapp/eal/eal.c638
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_alarm.c60
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_debug.c119
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_hugepage_info.c134
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_interrupts.c119
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_lcore.c79
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_log.c57
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_memory.c194
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_pci.c633
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_thread.c201
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_timer.c94
-rw-r--r--lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h107
-rw-r--r--lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h137
-rw-r--r--lib/librte_eal/bsdapp/eal/rte_eal_version.map153
-rw-r--r--lib/librte_eal/bsdapp/nic_uio/BSDmakefile36
-rw-r--r--lib/librte_eal/bsdapp/nic_uio/Makefile52
-rw-r--r--lib/librte_eal/bsdapp/nic_uio/nic_uio.c335
-rw-r--r--lib/librte_eal/common/Makefile61
-rw-r--r--lib/librte_eal/common/arch/arm/rte_cpuflags.c179
-rw-r--r--lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c147
-rw-r--r--lib/librte_eal/common/arch/tile/rte_cpuflags.c47
-rw-r--r--lib/librte_eal/common/arch/x86/rte_cpuflags.c220
-rw-r--r--lib/librte_eal/common/eal_common_cpuflags.c76
-rw-r--r--lib/librte_eal/common/eal_common_dev.c152
-rw-r--r--lib/librte_eal/common/eal_common_devargs.c176
-rw-r--r--lib/librte_eal/common/eal_common_errno.c72
-rw-r--r--lib/librte_eal/common/eal_common_hexdump.c120
-rw-r--r--lib/librte_eal/common/eal_common_launch.c118
-rw-r--r--lib/librte_eal/common/eal_common_lcore.c110
-rw-r--r--lib/librte_eal/common/eal_common_log.c337
-rw-r--r--lib/librte_eal/common/eal_common_memory.c154
-rw-r--r--lib/librte_eal/common/eal_common_memzone.c445
-rw-r--r--lib/librte_eal/common/eal_common_options.c1022
-rw-r--r--lib/librte_eal/common/eal_common_pci.c457
-rw-r--r--lib/librte_eal/common/eal_common_pci_uio.c222
-rw-r--r--lib/librte_eal/common/eal_common_proc.c61
-rw-r--r--lib/librte_eal/common/eal_common_string_fns.c69
-rw-r--r--lib/librte_eal/common/eal_common_tailqs.c202
-rw-r--r--lib/librte_eal/common/eal_common_thread.c157
-rw-r--r--lib/librte_eal/common/eal_common_timer.c86
-rw-r--r--lib/librte_eal/common/eal_filesystem.h118
-rw-r--r--lib/librte_eal/common/eal_hugepages.h67
-rw-r--r--lib/librte_eal/common/eal_internal_cfg.h94
-rw-r--r--lib/librte_eal/common/eal_options.h100
-rw-r--r--lib/librte_eal/common/eal_private.h331
-rw-r--r--lib/librte_eal/common/eal_thread.h100
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_atomic.h48
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_atomic_32.h74
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_atomic_64.h88
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_byteorder.h107
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cpuflags.h42
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h82
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h64
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cycles.h42
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cycles_32.h121
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cycles_64.h71
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_memcpy.h42
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h338
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h93
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_prefetch.h42
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h67
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h66
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_rwlock.h40
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_spinlock.h92
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_vect.h83
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h432
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h149
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h88
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h94
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h225
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h67
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h38
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h114
-rw-r--r--lib/librte_eal/common/include/arch/tile/rte_atomic.h92
-rw-r--r--lib/librte_eal/common/include/arch/tile/rte_byteorder.h91
-rw-r--r--lib/librte_eal/common/include/arch/tile/rte_cpuflags.h53
-rw-r--r--lib/librte_eal/common/include/arch/tile/rte_cycles.h70
-rw-r--r--lib/librte_eal/common/include/arch/tile/rte_memcpy.h93
-rw-r--r--lib/librte_eal/common/include/arch/tile/rte_prefetch.h67
-rw-r--r--lib/librte_eal/common/include/arch/tile/rte_rwlock.h70
-rw-r--r--lib/librte_eal/common/include/arch/tile/rte_spinlock.h92
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic.h222
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic_32.h222
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic_64.h191
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_byteorder.h125
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h51
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h52
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_cpuflags.h153
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_cycles.h121
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_memcpy.h884
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_prefetch.h67
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_rtm.h73
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_rwlock.h82
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_spinlock.h201
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_vect.h132
-rw-r--r--lib/librte_eal/common/include/generic/rte_atomic.h945
-rw-r--r--lib/librte_eal/common/include/generic/rte_byteorder.h217
-rw-r--r--lib/librte_eal/common/include/generic/rte_cpuflags.h82
-rw-r--r--lib/librte_eal/common/include/generic/rte_cycles.h205
-rw-r--r--lib/librte_eal/common/include/generic/rte_memcpy.h144
-rw-r--r--lib/librte_eal/common/include/generic/rte_prefetch.h83
-rw-r--r--lib/librte_eal/common/include/generic/rte_rwlock.h208
-rw-r--r--lib/librte_eal/common/include/generic/rte_spinlock.h325
-rw-r--r--lib/librte_eal/common/include/rte_alarm.h106
-rw-r--r--lib/librte_eal/common/include/rte_branch_prediction.h70
-rw-r--r--lib/librte_eal/common/include/rte_common.h401
-rw-r--r--lib/librte_eal/common/include/rte_debug.h103
-rw-r--r--lib/librte_eal/common/include/rte_dev.h192
-rw-r--r--lib/librte_eal/common/include/rte_devargs.h177
-rw-r--r--lib/librte_eal/common/include/rte_eal.h259
-rw-r--r--lib/librte_eal/common/include/rte_eal_memconfig.h100
-rw-r--r--lib/librte_eal/common/include/rte_errno.h95
-rw-r--r--lib/librte_eal/common/include/rte_hexdump.h89
-rw-r--r--lib/librte_eal/common/include/rte_interrupts.h120
-rw-r--r--lib/librte_eal/common/include/rte_keepalive.h108
-rw-r--r--lib/librte_eal/common/include/rte_launch.h177
-rw-r--r--lib/librte_eal/common/include/rte_lcore.h276
-rw-r--r--lib/librte_eal/common/include/rte_log.h311
-rw-r--r--lib/librte_eal/common/include/rte_malloc.h342
-rw-r--r--lib/librte_eal/common/include/rte_malloc_heap.h55
-rw-r--r--lib/librte_eal/common/include/rte_memory.h263
-rw-r--r--lib/librte_eal/common/include/rte_memzone.h305
-rw-r--r--lib/librte_eal/common/include/rte_pci.h598
-rw-r--r--lib/librte_eal/common/include/rte_pci_dev_feature_defs.h70
-rw-r--r--lib/librte_eal/common/include/rte_pci_dev_features.h69
-rw-r--r--lib/librte_eal/common/include/rte_pci_dev_ids.h704
-rw-r--r--lib/librte_eal/common/include/rte_per_lcore.h79
-rw-r--r--lib/librte_eal/common/include/rte_random.h91
-rw-r--r--lib/librte_eal/common/include/rte_string_fns.h81
-rw-r--r--lib/librte_eal/common/include/rte_tailq.h162
-rw-r--r--lib/librte_eal/common/include/rte_time.h122
-rw-r--r--lib/librte_eal/common/include/rte_version.h130
-rw-r--r--lib/librte_eal/common/include/rte_warnings.h84
-rw-r--r--lib/librte_eal/common/malloc_elem.c344
-rw-r--r--lib/librte_eal/common/malloc_elem.h192
-rw-r--r--lib/librte_eal/common/malloc_heap.c236
-rw-r--r--lib/librte_eal/common/malloc_heap.h70
-rw-r--r--lib/librte_eal/common/rte_keepalive.c149
-rw-r--r--lib/librte_eal/common/rte_malloc.c262
-rw-r--r--lib/librte_eal/linuxapp/Makefile39
-rw-r--r--lib/librte_eal/linuxapp/eal/Makefile139
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c936
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_alarm.c273
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_debug.c119
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_hugepage_info.c365
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_interrupts.c1224
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_ivshmem.c955
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_lcore.c110
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_log.c146
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c1559
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci.c762
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_init.h127
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_uio.c491
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_vfio.c1097
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c405
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_thread.c199
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_timer.c305
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.h67
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_xen_memory.c369
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h108
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h228
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h172
-rw-r--r--lib/librte_eal/linuxapp/eal/rte_eal_version.map156
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/Makefile53
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/compat.h116
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/igb_uio.c580
-rw-r--r--lib/librte_eal/linuxapp/kni/Makefile93
-rw-r--r--lib/librte_eal/linuxapp/kni/compat.h29
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/README100
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING339
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c3665
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h509
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c1159
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h157
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h1380
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h793
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c909
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h91
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c2096
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h80
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c554
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h89
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c525
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h87
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c965
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h75
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h136
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c3405
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h256
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h646
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h859
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c28
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c2857
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c260
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c10291
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c847
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c363
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c944
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h249
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c436
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h46
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c1482
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h3918
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c1171
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING339
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h925
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c1296
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h44
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c2313
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h58
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c1157
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h168
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c4082
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h140
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h168
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c2901
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h91
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c2970
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h105
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h132
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c1847
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h137
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h73
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h3254
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c937
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h58
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c1246
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h3143
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_dev.h149
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_ethtool.c217
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_fifo.h108
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_misc.c688
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_net.c706
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_vhost.c857
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/Makefile56
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/compat.h15
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h107
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c780
244 files changed, 108631 insertions, 0 deletions
diff --git a/lib/librte_eal/Makefile b/lib/librte_eal/Makefile
new file mode 100644
index 00000000..cf11a099
--- /dev/null
+++ b/lib/librte_eal/Makefile
@@ -0,0 +1,38 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-y += common
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += linuxapp
+DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += bsdapp
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/bsdapp/Makefile b/lib/librte_eal/bsdapp/Makefile
new file mode 100644
index 00000000..0e6e2be9
--- /dev/null
+++ b/lib/librte_eal/bsdapp/Makefile
@@ -0,0 +1,38 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal
+DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += contigmem
+DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += nic_uio
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/bsdapp/contigmem/BSDmakefile b/lib/librte_eal/bsdapp/contigmem/BSDmakefile
new file mode 100644
index 00000000..f64374c6
--- /dev/null
+++ b/lib/librte_eal/bsdapp/contigmem/BSDmakefile
@@ -0,0 +1,36 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+KMOD= contigmem
+SRCS= contigmem.c device_if.h bus_if.h
+
+.include <bsd.kmod.mk>
diff --git a/lib/librte_eal/bsdapp/contigmem/Makefile b/lib/librte_eal/bsdapp/contigmem/Makefile
new file mode 100644
index 00000000..bab005fd
--- /dev/null
+++ b/lib/librte_eal/bsdapp/contigmem/Makefile
@@ -0,0 +1,52 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = contigmem
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR)
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := contigmem.c
+
+include $(RTE_SDK)/mk/rte.bsdmodule.mk
diff --git a/lib/librte_eal/bsdapp/contigmem/contigmem.c b/lib/librte_eal/bsdapp/contigmem/contigmem.c
new file mode 100644
index 00000000..c6ca3b9c
--- /dev/null
+++ b/lib/librte_eal/bsdapp/contigmem/contigmem.c
@@ -0,0 +1,232 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+#include <machine/bus.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+static int contigmem_load(void);
+static int contigmem_unload(void);
+static int contigmem_physaddr(SYSCTL_HANDLER_ARGS);
+
+static d_mmap_t contigmem_mmap;
+static d_mmap_single_t contigmem_mmap_single;
+static d_open_t contigmem_open;
+
+static int contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS;
+static int64_t contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE;
+
+static eventhandler_tag contigmem_eh_tag;
+static void *contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
+static struct cdev *contigmem_cdev = NULL;
+
+TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers);
+TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size);
+
+static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem");
+
+SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD,
+ &contigmem_num_buffers, 0, "Number of contigmem buffers allocated");
+SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD,
+ &contigmem_buffer_size, 0, "Size of each contiguous buffer");
+
+static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0,
+ "physaddr");
+
+MALLOC_DEFINE(M_CONTIGMEM, "contigmem", "contigmem(4) allocations");
+
+static int contigmem_modevent(module_t mod, int type, void *arg)
+{
+ int error = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = contigmem_load();
+ break;
+ case MOD_UNLOAD:
+ error = contigmem_unload();
+ break;
+ default:
+ break;
+ }
+
+ return error;
+}
+
+moduledata_t contigmem_mod = {
+ "contigmem",
+ (modeventhand_t)contigmem_modevent,
+ 0
+};
+
+DECLARE_MODULE(contigmem, contigmem_mod, SI_SUB_DRIVERS, SI_ORDER_ANY);
+MODULE_VERSION(contigmem, 1);
+
+static struct cdevsw contigmem_ops = {
+ .d_name = "contigmem",
+ .d_version = D_VERSION,
+ .d_mmap = contigmem_mmap,
+ .d_mmap_single = contigmem_mmap_single,
+ .d_open = contigmem_open,
+};
+
+static int
+contigmem_load()
+{
+ char index_string[8], description[32];
+ int i;
+
+ if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) {
+ printf("%d buffers requested is greater than %d allowed\n",
+ contigmem_num_buffers, RTE_CONTIGMEM_MAX_NUM_BUFS);
+ return EINVAL;
+ }
+
+ if (contigmem_buffer_size < PAGE_SIZE ||
+ (contigmem_buffer_size & (contigmem_buffer_size - 1)) != 0) {
+ printf("buffer size 0x%lx is not greater than PAGE_SIZE and "
+ "power of two\n", contigmem_buffer_size);
+ return EINVAL;
+ }
+
+ for (i = 0; i < contigmem_num_buffers; i++) {
+ contigmem_buffers[i] =
+ contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO, 0,
+ BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
+
+ if (contigmem_buffers[i] == NULL) {
+ printf("contigmalloc failed for buffer %d\n", i);
+ return ENOMEM;
+ }
+
+ printf("%2u: virt=%p phys=%p\n", i, contigmem_buffers[i],
+ (void *)pmap_kextract((vm_offset_t)contigmem_buffers[i]));
+
+ snprintf(index_string, sizeof(index_string), "%d", i);
+ snprintf(description, sizeof(description),
+ "phys addr for buffer %d", i);
+ SYSCTL_ADD_PROC(NULL,
+ &SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO,
+ index_string, CTLTYPE_U64 | CTLFLAG_RD,
+ (void *)(uintptr_t)i, 0, contigmem_physaddr, "LU",
+ description);
+ }
+
+ contigmem_cdev = make_dev_credf(0, &contigmem_ops, 0, NULL, UID_ROOT,
+ GID_WHEEL, 0600, "contigmem");
+
+ return 0;
+}
+
+static int
+contigmem_unload()
+{
+ int i;
+
+ if (contigmem_cdev != NULL)
+ destroy_dev(contigmem_cdev);
+
+ if (contigmem_eh_tag != NULL)
+ EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag);
+
+ for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++)
+ if (contigmem_buffers[i] != NULL)
+ contigfree(contigmem_buffers[i], contigmem_buffer_size,
+ M_CONTIGMEM);
+
+ return 0;
+}
+
+static int
+contigmem_physaddr(SYSCTL_HANDLER_ARGS)
+{
+ uint64_t physaddr;
+ int index = (int)(uintptr_t)arg1;
+
+ physaddr = (uint64_t)vtophys(contigmem_buffers[index]);
+ return sysctl_handle_64(oidp, &physaddr, 0, req);
+}
+
+static int
+contigmem_open(struct cdev *cdev, int fflags, int devtype,
+ struct thread *td)
+{
+ return 0;
+}
+
+static int
+contigmem_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
+ int prot, vm_memattr_t *memattr)
+{
+
+ *paddr = offset;
+ return 0;
+}
+
+static int
+contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
+ struct vm_object **obj, int nprot)
+{
+ /*
+ * The buffer index is encoded in the offset. Divide the offset by
+ * PAGE_SIZE to get the index of the buffer requested by the user
+ * app.
+ */
+ if ((*offset/PAGE_SIZE) >= contigmem_num_buffers)
+ return EINVAL;
+
+ *offset = (vm_ooffset_t)vtophys(contigmem_buffers[*offset/PAGE_SIZE]);
+ *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset,
+ curthread->td_ucred);
+
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
new file mode 100644
index 00000000..9054ad61
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -0,0 +1,117 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+LIB = librte_eal.a
+
+ARCH_DIR ?= $(RTE_ARCH)
+VPATH += $(RTE_SDK)/lib/librte_eal/common
+VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
+
+CFLAGS += -I$(SRCDIR)/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_ring
+CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
+CFLAGS += $(WERROR_FLAGS) -O3
+
+LDLIBS += -lexecinfo
+LDLIBS += -lpthread
+LDLIBS += -lgcc_s
+
+EXPORT_MAP := rte_eal_version.map
+
+LIBABIVER := 2
+
+# specific to linuxapp exec-env
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_interrupts.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_alarm.c
+
+# from common dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci_uio.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_cpuflags.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c
+
+# from arch dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c
+
+CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
+
+CFLAGS_eal.o := -D_GNU_SOURCE
+#CFLAGS_eal_thread.o := -D_GNU_SOURCE
+CFLAGS_eal_log.o := -D_GNU_SOURCE
+CFLAGS_eal_common_log.o := -D_GNU_SOURCE
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_eal_thread.o += -Wno-return-type
+CFLAGS_eal_hpet.o += -Wno-return-type
+endif
+
+INC := rte_interrupts.h
+
+SYMLINK-$(CONFIG_RTE_EXEC_ENV_BSDAPP)-include/exec-env := \
+ $(addprefix include/exec-env/,$(INC))
+
+DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common
+DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common/arch/$(ARCH_DIR)
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
new file mode 100644
index 00000000..06bfd4e0
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -0,0 +1,638 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * Copyright(c) 2014 6WIND S.A.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <getopt.h>
+#include <sys/file.h>
+#include <stddef.h>
+#include <errno.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_random.h>
+#include <rte_cycles.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_common.h>
+#include <rte_version.h>
+#include <rte_atomic.h>
+#include <malloc_heap.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_hugepages.h"
+#include "eal_options.h"
+
+#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
+
+/* Allow the application to print its usage message too if set */
+static rte_usage_hook_t rte_application_usage_hook = NULL;
+/* early configuration structure, when memory config is not mmapped */
+static struct rte_mem_config early_mem_config;
+
+/* define fd variable here, because file needs to be kept open for the
+ * duration of the program, as we hold a write lock on it in the primary proc */
+static int mem_cfg_fd = -1;
+
+static struct flock wr_lock = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = offsetof(struct rte_mem_config, memseg),
+ .l_len = sizeof(early_mem_config.memseg),
+};
+
+/* Address of global and public configuration */
+static struct rte_config rte_config = {
+ .mem_config = &early_mem_config,
+};
+
+/* internal configuration (per-core) */
+struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/* internal configuration */
+struct internal_config internal_config;
+
+/* used by rte_rdtsc() */
+int rte_cycles_vmware_tsc_map;
+
+/* Return a pointer to the configuration structure */
+struct rte_config *
+rte_eal_get_configuration(void)
+{
+ return &rte_config;
+}
+
+/* parse a sysfs (or other) file containing one integer value */
+int
+eal_parse_sysfs_value(const char *filename, unsigned long *val)
+{
+ FILE *f;
+ char buf[BUFSIZ];
+ char *end = NULL;
+
+ if ((f = fopen(filename, "r")) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+ __func__, filename);
+ return -1;
+ }
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ *val = strtoul(buf, &end, 0);
+ if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+
+/* create memory configuration in shared/mmap memory. Take out
+ * a write lock on the memsegs, so we can auto-detect primary/secondary.
+ * This means we never close the file while running (auto-close on exit).
+ * We also don't lock the whole file, so that in future we can use read-locks
+ * on other parts, e.g. memzones, to detect if there are running secondary
+ * processes. */
+static void
+rte_eal_config_create(void)
+{
+ void *rte_mem_cfg_addr;
+ int retval;
+
+ const char *pathname = eal_runtime_config_path();
+
+ if (internal_config.no_shconf)
+ return;
+
+ if (mem_cfg_fd < 0){
+ mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
+ if (mem_cfg_fd < 0)
+ rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
+ }
+
+ retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
+ if (retval < 0){
+ close(mem_cfg_fd);
+ rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
+ }
+
+ retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
+ if (retval < 0){
+ close(mem_cfg_fd);
+ rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
+ "process running?\n", pathname);
+ }
+
+ rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
+ PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
+
+ if (rte_mem_cfg_addr == MAP_FAILED){
+ rte_panic("Cannot mmap memory for rte_config\n");
+ }
+ memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
+ rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+}
+
+/* attach to an existing shared memory config */
+static void
+rte_eal_config_attach(void)
+{
+ void *rte_mem_cfg_addr;
+ const char *pathname = eal_runtime_config_path();
+
+ if (internal_config.no_shconf)
+ return;
+
+ if (mem_cfg_fd < 0){
+ mem_cfg_fd = open(pathname, O_RDWR);
+ if (mem_cfg_fd < 0)
+ rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
+ }
+
+ rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
+ PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
+ close(mem_cfg_fd);
+ if (rte_mem_cfg_addr == MAP_FAILED)
+ rte_panic("Cannot mmap memory for rte_config\n");
+
+ rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+}
+
+/* Detect if we are a primary or a secondary process */
+enum rte_proc_type_t
+eal_proc_type_detect(void)
+{
+ enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
+ const char *pathname = eal_runtime_config_path();
+
+ /* if we can open the file but not get a write-lock we are a secondary
+ * process. NOTE: if we get a file handle back, we keep that open
+ * and don't close it to prevent a race condition between multiple opens */
+ if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+ (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+ ptype = RTE_PROC_SECONDARY;
+
+ RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
+ ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
+
+ return ptype;
+}
+
+/* Sets up rte_config structure with the pointer to shared memory config.*/
+static void
+rte_config_init(void)
+{
+ rte_config.process_type = internal_config.process_type;
+
+ switch (rte_config.process_type){
+ case RTE_PROC_PRIMARY:
+ rte_eal_config_create();
+ break;
+ case RTE_PROC_SECONDARY:
+ rte_eal_config_attach();
+ rte_eal_mcfg_wait_complete(rte_config.mem_config);
+ break;
+ case RTE_PROC_AUTO:
+ case RTE_PROC_INVALID:
+ rte_panic("Invalid process type\n");
+ }
+}
+
+/* display usage */
+static void
+eal_usage(const char *prgname)
+{
+ printf("\nUsage: %s ", prgname);
+ eal_common_usage();
+ /* Allow the application to print its usage message too if hook is set */
+ if ( rte_application_usage_hook ) {
+ printf("===== Application Usage =====\n\n");
+ rte_application_usage_hook(prgname);
+ }
+}
+
+/* Set a per-application usage message */
+rte_usage_hook_t
+rte_set_application_usage_hook( rte_usage_hook_t usage_func )
+{
+ rte_usage_hook_t old_func;
+
+ /* Will be NULL on the first call to denote the last usage routine. */
+ old_func = rte_application_usage_hook;
+ rte_application_usage_hook = usage_func;
+
+ return old_func;
+}
+
+static inline size_t
+eal_get_hugepage_mem_size(void)
+{
+ uint64_t size = 0;
+ unsigned i, j;
+
+ for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+ struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+ if (hpi->hugedir != NULL) {
+ for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
+ size += hpi->hugepage_sz * hpi->num_pages[j];
+ }
+ }
+ }
+
+ return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
+}
+
+/* Parse the arguments for --log-level only */
+static void
+eal_log_level_parse(int argc, char **argv)
+{
+ int opt;
+ char **argvopt;
+ int option_index;
+ const int old_optind = optind;
+ const int old_optopt = optopt;
+ const int old_optreset = optreset;
+ char * const old_optarg = optarg;
+
+ argvopt = argv;
+ optind = 1;
+ optreset = 1;
+
+ eal_reset_internal_config(&internal_config);
+
+ while ((opt = getopt_long(argc, argvopt, eal_short_options,
+ eal_long_options, &option_index)) != EOF) {
+
+ int ret;
+
+ /* getopt is not happy, stop right now */
+ if (opt == '?')
+ break;
+
+ ret = (opt == OPT_LOG_LEVEL_NUM) ?
+ eal_parse_common_option(opt, optarg, &internal_config) : 0;
+
+ /* common parser is not happy */
+ if (ret < 0)
+ break;
+ }
+
+ /* restore getopt lib */
+ optind = old_optind;
+ optopt = old_optopt;
+ optreset = old_optreset;
+ optarg = old_optarg;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+eal_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ const int old_optind = optind;
+ const int old_optopt = optopt;
+ const int old_optreset = optreset;
+ char * const old_optarg = optarg;
+
+ argvopt = argv;
+ optind = 1;
+ optreset = 1;
+
+ while ((opt = getopt_long(argc, argvopt, eal_short_options,
+ eal_long_options, &option_index)) != EOF) {
+
+ /* getopt is not happy, stop right now */
+ if (opt == '?') {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = eal_parse_common_option(opt, optarg, &internal_config);
+ /* common parser is not happy */
+ if (ret < 0) {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ /* common parser handled this option */
+ if (ret == 0)
+ continue;
+
+ switch (opt) {
+ case 'h':
+ eal_usage(prgname);
+ exit(EXIT_SUCCESS);
+ default:
+ if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
+ RTE_LOG(ERR, EAL, "Option %c is not supported "
+ "on FreeBSD\n", opt);
+ } else if (opt >= OPT_LONG_MIN_NUM &&
+ opt < OPT_LONG_MAX_NUM) {
+ RTE_LOG(ERR, EAL, "Option %s is not supported "
+ "on FreeBSD\n",
+ eal_long_options[option_index].name);
+ } else {
+ RTE_LOG(ERR, EAL, "Option %d is not supported "
+ "on FreeBSD\n", opt);
+ }
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (eal_adjust_config(&internal_config) != 0) {
+ ret = -1;
+ goto out;
+ }
+
+ /* sanity checks */
+ if (eal_check_common_options(&internal_config) != 0) {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+ ret = optind-1;
+
+out:
+ /* restore getopt lib */
+ optind = old_optind;
+ optopt = old_optopt;
+ optreset = old_optreset;
+ optarg = old_optarg;
+
+ return ret;
+}
+
+static void
+eal_check_mem_on_local_socket(void)
+{
+ const struct rte_memseg *ms;
+ int i, socket_id;
+
+ socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
+
+ ms = rte_eal_get_physmem_layout();
+
+ for (i = 0; i < RTE_MAX_MEMSEG; i++)
+ if (ms[i].socket_id == socket_id &&
+ ms[i].len > 0)
+ return;
+
+ RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
+ "memory on local socket!\n");
+}
+
+static int
+sync_func(__attribute__((unused)) void *arg)
+{
+ return 0;
+}
+
+inline static void
+rte_eal_mcfg_complete(void)
+{
+ /* ALL shared mem_config related INIT DONE */
+ if (rte_config.process_type == RTE_PROC_PRIMARY)
+ rte_config.mem_config->magic = RTE_MAGIC;
+}
+
+/* return non-zero if hugepages are enabled. */
+int rte_eal_has_hugepages(void)
+{
+ return !internal_config.no_hugetlbfs;
+}
+
+/* Abstraction for port I/0 privilege */
+int
+rte_eal_iopl_init(void)
+{
+ static int fd;
+
+ fd = open("/dev/io", O_RDWR);
+ if (fd < 0)
+ return -1;
+ /* keep fd open for iopl */
+ return 0;
+}
+
+/* Launch threads, called at application init(). */
+int
+rte_eal_init(int argc, char **argv)
+{
+ int i, fctret, ret;
+ pthread_t thread_id;
+ static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
+ char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+ char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+ if (!rte_atomic32_test_and_set(&run_once))
+ return -1;
+
+ thread_id = pthread_self();
+
+ if (rte_eal_log_early_init() < 0)
+ rte_panic("Cannot init early logs\n");
+
+ eal_log_level_parse(argc, argv);
+
+ /* set log level as early as possible */
+ rte_set_log_level(internal_config.log_level);
+
+ if (rte_eal_cpu_init() < 0)
+ rte_panic("Cannot detect lcores\n");
+
+ fctret = eal_parse_args(argc, argv);
+ if (fctret < 0)
+ exit(1);
+
+ if (internal_config.no_hugetlbfs == 0 &&
+ internal_config.process_type != RTE_PROC_SECONDARY &&
+ eal_hugepage_info_init() < 0)
+ rte_panic("Cannot get hugepage information\n");
+
+ if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
+ if (internal_config.no_hugetlbfs)
+ internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
+ else
+ internal_config.memory = eal_get_hugepage_mem_size();
+ }
+
+ if (internal_config.vmware_tsc_map == 1) {
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+ rte_cycles_vmware_tsc_map = 1;
+ RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
+ "you must have monitor_control.pseudo_perfctr = TRUE\n");
+#else
+ RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
+ "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
+#endif
+ }
+
+ rte_srand(rte_rdtsc());
+
+ rte_config_init();
+
+ if (rte_eal_memory_init() < 0)
+ rte_panic("Cannot init memory\n");
+
+ if (rte_eal_memzone_init() < 0)
+ rte_panic("Cannot init memzone\n");
+
+ if (rte_eal_tailqs_init() < 0)
+ rte_panic("Cannot init tail queues for objects\n");
+
+/* if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0)
+ rte_panic("Cannot init logs\n");*/
+
+ if (rte_eal_alarm_init() < 0)
+ rte_panic("Cannot init interrupt-handling thread\n");
+
+ if (rte_eal_intr_init() < 0)
+ rte_panic("Cannot init interrupt-handling thread\n");
+
+ if (rte_eal_timer_init() < 0)
+ rte_panic("Cannot init HPET or TSC timers\n");
+
+ if (rte_eal_pci_init() < 0)
+ rte_panic("Cannot init PCI\n");
+
+ eal_check_mem_on_local_socket();
+
+ if (eal_plugins_init() < 0)
+ rte_panic("Cannot init plugins\n");
+
+ eal_thread_init_master(rte_config.master_lcore);
+
+ ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+
+ RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
+ rte_config.master_lcore, thread_id, cpuset,
+ ret == 0 ? "" : "...");
+
+ if (rte_eal_dev_init() < 0)
+ rte_panic("Cannot init pmd devices\n");
+
+ RTE_LCORE_FOREACH_SLAVE(i) {
+
+ /*
+ * create communication pipes between master thread
+ * and children
+ */
+ if (pipe(lcore_config[i].pipe_master2slave) < 0)
+ rte_panic("Cannot create pipe\n");
+ if (pipe(lcore_config[i].pipe_slave2master) < 0)
+ rte_panic("Cannot create pipe\n");
+
+ lcore_config[i].state = WAIT;
+
+ /* create a thread for each lcore */
+ ret = pthread_create(&lcore_config[i].thread_id, NULL,
+ eal_thread_loop, NULL);
+ if (ret != 0)
+ rte_panic("Cannot create thread\n");
+
+ /* Set thread_name for aid in debugging. */
+ snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+ "lcore-slave-%d", i);
+ pthread_set_name_np(lcore_config[i].thread_id, thread_name);
+ }
+
+ /*
+ * Launch a dummy function on all slave lcores, so that master lcore
+ * knows they are all ready when this function returns.
+ */
+ rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
+ rte_eal_mp_wait_lcore();
+
+ /* Probe & Initialize PCI devices */
+ if (rte_eal_pci_probe())
+ rte_panic("Cannot probe PCI\n");
+
+ rte_eal_mcfg_complete();
+
+ return fctret;
+}
+
+/* get core role */
+enum rte_lcore_role_t
+rte_eal_lcore_role(unsigned lcore_id)
+{
+ return rte_config.lcore_role[lcore_id];
+}
+
+enum rte_proc_type_t
+rte_eal_process_type(void)
+{
+ return rte_config.process_type;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c
new file mode 100644
index 00000000..204df85d
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c
@@ -0,0 +1,60 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdlib.h>
+#include <errno.h>
+
+#include <rte_alarm.h>
+#include <rte_common.h>
+#include "eal_private.h"
+
+int
+rte_eal_alarm_init(void)
+{
+ return 0;
+}
+
+
+int
+rte_eal_alarm_set(uint64_t us __rte_unused,
+ rte_eal_alarm_callback cb_fn __rte_unused,
+ void *cb_arg __rte_unused)
+{
+ return -ENOTSUP;
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused,
+ void *cb_arg __rte_unused)
+{
+ return -ENOTSUP;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_debug.c b/lib/librte_eal/bsdapp/eal/eal_debug.c
new file mode 100644
index 00000000..907fbfa7
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_debug.c
@@ -0,0 +1,119 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <execinfo.h>
+#include <stdarg.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+
+#define BACKTRACE_SIZE 256
+
+/* dump the stack of the calling core */
+void rte_dump_stack(void)
+{
+ void *func[BACKTRACE_SIZE];
+ char **symb = NULL;
+ int size;
+
+ size = backtrace(func, BACKTRACE_SIZE);
+ symb = backtrace_symbols(func, size);
+
+ if (symb == NULL)
+ return;
+
+ while (size > 0) {
+ rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL,
+ "%d: [%s]\n", size, symb[size - 1]);
+ size --;
+ }
+
+ free(symb);
+}
+
+/* not implemented in this environment */
+void rte_dump_registers(void)
+{
+ return;
+}
+
+/* call abort(), it will generate a coredump if enabled */
+void __rte_panic(const char *funcname, const char *format, ...)
+{
+ va_list ap;
+
+ /* disable history */
+ rte_log_set_history(0);
+
+ rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname);
+ va_start(ap, format);
+ rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+ va_end(ap);
+ rte_dump_stack();
+ rte_dump_registers();
+ abort();
+}
+
+/*
+ * Like rte_panic this terminates the application. However, no traceback is
+ * provided and no core-dump is generated.
+ */
+void
+rte_exit(int exit_code, const char *format, ...)
+{
+ va_list ap;
+
+ /* disable history */
+ rte_log_set_history(0);
+
+ if (exit_code != 0)
+ RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n"
+ " Cause: ", exit_code);
+
+ va_start(ap, format);
+ rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+ va_end(ap);
+
+#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR
+ exit(exit_code);
+#else
+ rte_dump_stack();
+ rte_dump_registers();
+ abort();
+#endif
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
new file mode 100644
index 00000000..8a33c30c
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
@@ -0,0 +1,134 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#include <rte_log.h>
+#include <fcntl.h>
+#include "eal_hugepages.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+
+#define CONTIGMEM_DEV "/dev/contigmem"
+
+/*
+ * Uses mmap to create a shared memory area for storage of data
+ * Used in this file to store the hugepage file map on disk
+ */
+static void *
+create_shared_memory(const char *filename, const size_t mem_size)
+{
+ void *retval;
+ int fd = open(filename, O_CREAT | O_RDWR, 0666);
+ if (fd < 0)
+ return NULL;
+ if (ftruncate(fd, mem_size) < 0) {
+ close(fd);
+ return NULL;
+ }
+ retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ close(fd);
+ return retval;
+}
+
+/*
+ * No hugepage support on freebsd, but we dummy it, using contigmem driver
+ */
+int
+eal_hugepage_info_init(void)
+{
+ size_t sysctl_size;
+ int num_buffers, fd, error;
+ int64_t buffer_size;
+ /* re-use the linux "internal config" structure for our memory data */
+ struct hugepage_info *hpi = &internal_config.hugepage_info[0];
+ struct hugepage_info *tmp_hpi;
+
+ sysctl_size = sizeof(num_buffers);
+ error = sysctlbyname("hw.contigmem.num_buffers", &num_buffers,
+ &sysctl_size, NULL, 0);
+
+ if (error != 0) {
+ RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers");
+ return -1;
+ }
+
+ sysctl_size = sizeof(buffer_size);
+ error = sysctlbyname("hw.contigmem.buffer_size", &buffer_size,
+ &sysctl_size, NULL, 0);
+
+ if (error != 0) {
+ RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size");
+ return -1;
+ }
+
+ fd = open(CONTIGMEM_DEV, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "could not open "CONTIGMEM_DEV"\n");
+ return -1;
+ }
+
+ if (buffer_size >= 1<<30)
+ RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dGB\n",
+ num_buffers, (int)(buffer_size>>30));
+ else if (buffer_size >= 1<<20)
+ RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dMB\n",
+ num_buffers, (int)(buffer_size>>20));
+ else
+ RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n",
+ num_buffers, (int)(buffer_size>>10));
+
+ internal_config.num_hugepage_sizes = 1;
+ hpi->hugedir = CONTIGMEM_DEV;
+ hpi->hugepage_sz = buffer_size;
+ hpi->num_pages[0] = num_buffers;
+ hpi->lock_descriptor = fd;
+
+ tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
+ sizeof(struct hugepage_info));
+ if (tmp_hpi == NULL ) {
+ RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
+ return -1;
+ }
+
+ memcpy(tmp_hpi, hpi, sizeof(struct hugepage_info));
+
+ if ( munmap(tmp_hpi, sizeof(struct hugepage_info)) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
new file mode 100644
index 00000000..836e4836
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
@@ -0,0 +1,119 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_common.h>
+#include <rte_interrupts.h>
+#include "eal_private.h"
+
+int
+rte_intr_callback_register(struct rte_intr_handle *intr_handle __rte_unused,
+ rte_intr_callback_fn cb __rte_unused,
+ void *cb_arg __rte_unused)
+{
+ return -ENOTSUP;
+}
+
+int
+rte_intr_callback_unregister(struct rte_intr_handle *intr_handle __rte_unused,
+ rte_intr_callback_fn cb_fn __rte_unused,
+ void *cb_arg __rte_unused)
+{
+ return -ENOTSUP;
+}
+
+int
+rte_intr_enable(struct rte_intr_handle *intr_handle __rte_unused)
+{
+ return -ENOTSUP;
+}
+
+int
+rte_intr_disable(struct rte_intr_handle *intr_handle __rte_unused)
+{
+ return -ENOTSUP;
+}
+
+int
+rte_eal_intr_init(void)
+{
+ return 0;
+}
+
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
+ int epfd, int op, unsigned int vec, void *data)
+{
+ RTE_SET_USED(intr_handle);
+ RTE_SET_USED(epfd);
+ RTE_SET_USED(op);
+ RTE_SET_USED(vec);
+ RTE_SET_USED(data);
+
+ return -ENOTSUP;
+}
+
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
+{
+ RTE_SET_USED(intr_handle);
+ RTE_SET_USED(nb_efd);
+
+ return 0;
+}
+
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
+{
+ RTE_SET_USED(intr_handle);
+}
+
+int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
+{
+ RTE_SET_USED(intr_handle);
+ return 0;
+}
+
+int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle)
+{
+ RTE_SET_USED(intr_handle);
+ return 1;
+}
+
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
+{
+ RTE_SET_USED(intr_handle);
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_lcore.c b/lib/librte_eal/bsdapp/eal/eal_lcore.c
new file mode 100644
index 00000000..b8bfafde
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_lcore.c
@@ -0,0 +1,79 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <sys/sysctl.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+/* No topology information available on FreeBSD including NUMA info */
+unsigned
+eal_cpu_core_id(__rte_unused unsigned lcore_id)
+{
+ return 0;
+}
+
+static int
+eal_get_ncpus(void)
+{
+ int mib[2] = {CTL_HW, HW_NCPU};
+ int ncpu;
+ size_t len = sizeof(ncpu);
+
+ sysctl(mib, 2, &ncpu, &len, NULL, 0);
+ RTE_LOG(INFO, EAL, "Sysctl reports %d cpus\n", ncpu);
+ return ncpu;
+}
+
+unsigned
+eal_cpu_socket_id(__rte_unused unsigned cpu_id)
+{
+ return 0;
+}
+
+/* Check if a cpu is present by the presence of the
+ * cpu information for it.
+ */
+int
+eal_cpu_detected(unsigned lcore_id)
+{
+ const unsigned ncpus = eal_get_ncpus();
+ return lcore_id < ncpus;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_log.c b/lib/librte_eal/bsdapp/eal/eal_log.c
new file mode 100644
index 00000000..a425f7a8
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_log.c
@@ -0,0 +1,57 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <rte_common.h>
+#include <rte_log.h>
+
+#include <eal_private.h>
+
+/*
+ * set the log to default function, called during eal init process,
+ * once memzones are available.
+ */
+int
+rte_eal_log_init(const char *id __rte_unused, int facility __rte_unused)
+{
+ if (rte_eal_common_log_init(stderr) < 0)
+ return -1;
+ return 0;
+}
+
+int
+rte_eal_log_early_init(void)
+{
+ rte_openlog_stream(stderr);
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c
new file mode 100644
index 00000000..3614da8d
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -0,0 +1,194 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <inttypes.h>
+#include <fcntl.h>
+
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+
+#define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
+
+/*
+ * Get physical address of any mapped virtual address in the current process.
+ */
+phys_addr_t
+rte_mem_virt2phy(const void *virtaddr)
+{
+ /* XXX not implemented. This function is only used by
+ * rte_mempool_virt2phy() when hugepages are disabled. */
+ (void)virtaddr;
+ return RTE_BAD_PHYS_ADDR;
+}
+
+int
+rte_eal_hugepage_init(void)
+{
+ struct rte_mem_config *mcfg;
+ uint64_t total_mem = 0;
+ void *addr;
+ unsigned i, j, seg_idx = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* for debug purposes, hugetlbfs can be disabled */
+ if (internal_config.no_hugetlbfs) {
+ addr = malloc(internal_config.memory);
+ mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr;
+ mcfg->memseg[0].addr = addr;
+ mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
+ mcfg->memseg[0].len = internal_config.memory;
+ mcfg->memseg[0].socket_id = 0;
+ return 0;
+ }
+
+ /* map all hugepages and sort them */
+ for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
+ struct hugepage_info *hpi;
+
+ hpi = &internal_config.hugepage_info[i];
+ for (j = 0; j < hpi->num_pages[0]; j++) {
+ struct rte_memseg *seg;
+ uint64_t physaddr;
+ int error;
+ size_t sysctl_size = sizeof(physaddr);
+ char physaddr_str[64];
+
+ addr = mmap(NULL, hpi->hugepage_sz, PROT_READ|PROT_WRITE,
+ MAP_SHARED, hpi->lock_descriptor,
+ j * EAL_PAGE_SIZE);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
+ j, hpi->hugedir);
+ return -1;
+ }
+
+ snprintf(physaddr_str, sizeof(physaddr_str), "hw.contigmem"
+ ".physaddr.%d", j);
+ error = sysctlbyname(physaddr_str, &physaddr, &sysctl_size,
+ NULL, 0);
+ if (error < 0) {
+ RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
+ "from %s\n", j, hpi->hugedir);
+ return -1;
+ }
+
+ seg = &mcfg->memseg[seg_idx++];
+ seg->addr = addr;
+ seg->phys_addr = physaddr;
+ seg->hugepage_sz = hpi->hugepage_sz;
+ seg->len = hpi->hugepage_sz;
+ seg->nchannel = mcfg->nchannel;
+ seg->nrank = mcfg->nrank;
+ seg->socket_id = 0;
+
+ RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
+ PRIx64", len %zu\n",
+ seg_idx, addr, physaddr, hpi->hugepage_sz);
+ if (total_mem >= internal_config.memory ||
+ seg_idx >= RTE_MAX_MEMSEG)
+ break;
+ }
+ }
+ return 0;
+}
+
+int
+rte_eal_hugepage_attach(void)
+{
+ const struct hugepage_info *hpi;
+ int fd_hugepage_info, fd_hugepage = -1;
+ unsigned i = 0;
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* Obtain a file descriptor for hugepage_info */
+ fd_hugepage_info = open(eal_hugepage_info_path(), O_RDONLY);
+ if (fd_hugepage_info < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
+ return -1;
+ }
+
+ /* Map the shared hugepage_info into the process address spaces */
+ hpi = mmap(NULL, sizeof(struct hugepage_info), PROT_READ, MAP_PRIVATE,
+ fd_hugepage_info, 0);
+ if (hpi == NULL) {
+ RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
+ goto error;
+ }
+
+ /* Obtain a file descriptor for contiguous memory */
+ fd_hugepage = open(hpi->hugedir, O_RDWR);
+ if (fd_hugepage < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s\n", hpi->hugedir);
+ goto error;
+ }
+
+ /* Map the contiguous memory into each memory segment */
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+
+ void *addr;
+ struct rte_memseg *seg = &mcfg->memseg[i];
+
+ addr = mmap(seg->addr, hpi->hugepage_sz, PROT_READ|PROT_WRITE,
+ MAP_SHARED|MAP_FIXED, fd_hugepage,
+ i * EAL_PAGE_SIZE);
+ if (addr == MAP_FAILED || addr != seg->addr) {
+ RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
+ i, hpi->hugedir);
+ goto error;
+ }
+
+ }
+
+ /* hugepage_info is no longer required */
+ munmap((void *)(uintptr_t)hpi, sizeof(struct hugepage_info));
+ close(fd_hugepage_info);
+ close(fd_hugepage);
+ return 0;
+
+error:
+ if (fd_hugepage_info >= 0)
+ close(fd_hugepage_info);
+ if (fd_hugepage >= 0)
+ close(fd_hugepage);
+ return -1;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
new file mode 100644
index 00000000..2d16d782
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -0,0 +1,633 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <dirent.h>
+#include <limits.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/pciio.h>
+#include <dev/pci/pcireg.h>
+
+#if defined(RTE_ARCH_X86)
+#include <sys/types.h>
+#include <machine/cpufunc.h>
+#endif
+
+#include <rte_interrupts.h>
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_common.h>
+#include <rte_launch.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_debug.h>
+#include <rte_devargs.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+/**
+ * @file
+ * PCI probing under linux
+ *
+ * This code is used to simulate a PCI probe by parsing information in
+ * sysfs. Moreover, when a registered driver matches a device, the
+ * kernel driver currently using it is unloaded and replaced by
+ * igb_uio module, which is a very minimal userland driver for Intel
+ * network card, only providing access to PCI BAR to applications, and
+ * enabling bus master.
+ */
+
+/* unbind kernel driver for this device */
+int
+pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused)
+{
+ RTE_LOG(ERR, EAL, "RTE_PCI_DRV_FORCE_UNBIND flag is not implemented "
+ "for BSD\n");
+ return -ENOTSUP;
+}
+
+/* Map pci device */
+int
+rte_eal_pci_map_device(struct rte_pci_device *dev)
+{
+ int ret = -1;
+
+ /* try mapping the NIC resources */
+ switch (dev->kdrv) {
+ case RTE_KDRV_NIC_UIO:
+ /* map resources for devices that use uio */
+ ret = pci_uio_map_resource(dev);
+ break;
+ default:
+ RTE_LOG(DEBUG, EAL,
+ " Not managed by a supported kernel driver, skipped\n");
+ ret = 1;
+ break;
+ }
+
+ return ret;
+}
+
+/* Unmap pci device */
+void
+rte_eal_pci_unmap_device(struct rte_pci_device *dev)
+{
+ /* try unmapping the NIC resources */
+ switch (dev->kdrv) {
+ case RTE_KDRV_NIC_UIO:
+ /* unmap resources for devices that use uio */
+ pci_uio_unmap_resource(dev);
+ break;
+ default:
+ RTE_LOG(DEBUG, EAL,
+ " Not managed by a supported kernel driver, skipped\n");
+ break;
+ }
+}
+
+void
+pci_uio_free_resource(struct rte_pci_device *dev,
+ struct mapped_pci_resource *uio_res)
+{
+ rte_free(uio_res);
+
+ if (dev->intr_handle.fd) {
+ close(dev->intr_handle.fd);
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ }
+}
+
+int
+pci_uio_alloc_resource(struct rte_pci_device *dev,
+ struct mapped_pci_resource **uio_res)
+{
+ char devname[PATH_MAX]; /* contains the /dev/uioX */
+ struct rte_pci_addr *loc;
+
+ loc = &dev->addr;
+
+ snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u",
+ dev->addr.bus, dev->addr.devid, dev->addr.function);
+
+ if (access(devname, O_RDWR) < 0) {
+ RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, "
+ "skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
+ return 1;
+ }
+
+ /* save fd if in primary process */
+ dev->intr_handle.fd = open(devname, O_RDWR);
+ if (dev->intr_handle.fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ devname, strerror(errno));
+ goto error;
+ }
+ dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
+
+ /* allocate the mapping details for secondary processes*/
+ *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
+ if (*uio_res == NULL) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot store uio mmap details\n", __func__);
+ goto error;
+ }
+
+ snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname);
+ memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));
+
+ return 0;
+
+error:
+ pci_uio_free_resource(dev, *uio_res);
+ return -1;
+}
+
+int
+pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
+ struct mapped_pci_resource *uio_res, int map_idx)
+{
+ int fd;
+ char *devname;
+ void *mapaddr;
+ uint64_t offset;
+ uint64_t pagesz;
+ struct pci_map *maps;
+
+ maps = uio_res->maps;
+ devname = uio_res->path;
+ pagesz = sysconf(_SC_PAGESIZE);
+
+ /* allocate memory to keep path */
+ maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
+ if (maps[map_idx].path == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ /*
+ * open resource file, to mmap it
+ */
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ devname, strerror(errno));
+ goto error;
+ }
+
+ /* if matching map is found, then use it */
+ offset = res_idx * pagesz;
+ mapaddr = pci_map_resource(NULL, fd, (off_t)offset,
+ (size_t)dev->mem_resource[res_idx].len, 0);
+ close(fd);
+ if (mapaddr == MAP_FAILED)
+ goto error;
+
+ maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
+ maps[map_idx].size = dev->mem_resource[res_idx].len;
+ maps[map_idx].addr = mapaddr;
+ maps[map_idx].offset = offset;
+ strcpy(maps[map_idx].path, devname);
+ dev->mem_resource[res_idx].addr = mapaddr;
+
+ return 0;
+
+error:
+ rte_free(maps[map_idx].path);
+ return -1;
+}
+
+static int
+pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
+{
+ struct rte_pci_device *dev;
+ struct pci_bar_io bar;
+ unsigned i, max;
+
+ dev = malloc(sizeof(*dev));
+ if (dev == NULL) {
+ return -1;
+ }
+
+ memset(dev, 0, sizeof(*dev));
+ dev->addr.domain = conf->pc_sel.pc_domain;
+ dev->addr.bus = conf->pc_sel.pc_bus;
+ dev->addr.devid = conf->pc_sel.pc_dev;
+ dev->addr.function = conf->pc_sel.pc_func;
+
+ /* get vendor id */
+ dev->id.vendor_id = conf->pc_vendor;
+
+ /* get device id */
+ dev->id.device_id = conf->pc_device;
+
+ /* get subsystem_vendor id */
+ dev->id.subsystem_vendor_id = conf->pc_subvendor;
+
+ /* get subsystem_device id */
+ dev->id.subsystem_device_id = conf->pc_subdevice;
+
+ /* TODO: get max_vfs */
+ dev->max_vfs = 0;
+
+ /* FreeBSD has no NUMA support (yet) */
+ dev->numa_node = 0;
+
+ /* FreeBSD has only one pass through driver */
+ dev->kdrv = RTE_KDRV_NIC_UIO;
+
+ /* parse resources */
+ switch (conf->pc_hdr & PCIM_HDRTYPE) {
+ case PCIM_HDRTYPE_NORMAL:
+ max = PCIR_MAX_BAR_0;
+ break;
+ case PCIM_HDRTYPE_BRIDGE:
+ max = PCIR_MAX_BAR_1;
+ break;
+ case PCIM_HDRTYPE_CARDBUS:
+ max = PCIR_MAX_BAR_2;
+ break;
+ default:
+ goto skipdev;
+ }
+
+ for (i = 0; i <= max; i++) {
+ bar.pbi_sel = conf->pc_sel;
+ bar.pbi_reg = PCIR_BAR(i);
+ if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0)
+ continue;
+
+ dev->mem_resource[i].len = bar.pbi_length;
+ if (PCI_BAR_IO(bar.pbi_base)) {
+ dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf));
+ continue;
+ }
+ dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf);
+ }
+
+ /* device is valid, add in list (sorted) */
+ if (TAILQ_EMPTY(&pci_device_list)) {
+ TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
+ }
+ else {
+ struct rte_pci_device *dev2 = NULL;
+ int ret;
+
+ TAILQ_FOREACH(dev2, &pci_device_list, next) {
+ ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr);
+ if (ret > 0)
+ continue;
+ else if (ret < 0) {
+ TAILQ_INSERT_BEFORE(dev2, dev, next);
+ return 0;
+ } else { /* already registered */
+ dev2->kdrv = dev->kdrv;
+ dev2->max_vfs = dev->max_vfs;
+ memmove(dev2->mem_resource,
+ dev->mem_resource,
+ sizeof(dev->mem_resource));
+ free(dev);
+ return 0;
+ }
+ }
+ TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
+ }
+
+ return 0;
+
+skipdev:
+ free(dev);
+ return 0;
+}
+
+/*
+ * Scan the content of the PCI bus, and add the devices in the devices
+ * list. Call pci_scan_one() for each pci entry found.
+ */
+int
+rte_eal_pci_scan(void)
+{
+ int fd;
+ unsigned dev_count = 0;
+ struct pci_conf matches[16];
+ struct pci_conf_io conf_io = {
+ .pat_buf_len = 0,
+ .num_patterns = 0,
+ .patterns = NULL,
+ .match_buf_len = sizeof(matches),
+ .matches = &matches[0],
+ };
+
+ fd = open("/dev/pci", O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
+ goto error;
+ }
+
+ do {
+ unsigned i;
+ if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n",
+ __func__, strerror(errno));
+ goto error;
+ }
+
+ for (i = 0; i < conf_io.num_matches; i++)
+ if (pci_scan_one(fd, &matches[i]) < 0)
+ goto error;
+
+ dev_count += conf_io.num_matches;
+ } while(conf_io.status == PCI_GETCONF_MORE_DEVS);
+
+ close(fd);
+
+ RTE_LOG(ERR, EAL, "PCI scan found %u devices\n", dev_count);
+ return 0;
+
+error:
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+/* Read PCI config space. */
+int rte_eal_pci_read_config(const struct rte_pci_device *dev,
+ void *buf, size_t len, off_t offset)
+{
+ int fd = -1;
+ struct pci_io pi = {
+ .pi_sel = {
+ .pc_domain = dev->addr.domain,
+ .pc_bus = dev->addr.bus,
+ .pc_dev = dev->addr.devid,
+ .pc_func = dev->addr.function,
+ },
+ .pi_reg = offset,
+ .pi_width = len,
+ };
+
+ if (len == 3 || len > sizeof(pi.pi_data)) {
+ RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__);
+ goto error;
+ }
+
+ fd = open("/dev/pci", O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
+ goto error;
+ }
+
+ if (ioctl(fd, PCIOCREAD, &pi) < 0)
+ goto error;
+ close(fd);
+
+ memcpy(buf, &pi.pi_data, len);
+ return 0;
+
+ error:
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+/* Write PCI config space. */
+int rte_eal_pci_write_config(const struct rte_pci_device *dev,
+ const void *buf, size_t len, off_t offset)
+{
+ int fd = -1;
+
+ struct pci_io pi = {
+ .pi_sel = {
+ .pc_domain = dev->addr.domain,
+ .pc_bus = dev->addr.bus,
+ .pc_dev = dev->addr.devid,
+ .pc_func = dev->addr.function,
+ },
+ .pi_reg = offset,
+ .pi_data = *(const uint32_t *)buf,
+ .pi_width = len,
+ };
+
+ if (len == 3 || len > sizeof(pi.pi_data)) {
+ RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__);
+ goto error;
+ }
+
+ memcpy(&pi.pi_data, buf, len);
+
+ fd = open("/dev/pci", O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
+ goto error;
+ }
+
+ if (ioctl(fd, PCIOCWRITE, &pi) < 0)
+ goto error;
+
+ close(fd);
+ return 0;
+
+ error:
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+int
+rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
+ struct rte_pci_ioport *p)
+{
+ int ret;
+
+ switch (dev->kdrv) {
+#if defined(RTE_ARCH_X86)
+ case RTE_KDRV_NIC_UIO:
+ if ((uintptr_t) dev->mem_resource[bar].addr <= UINT16_MAX) {
+ p->base = (uintptr_t)dev->mem_resource[bar].addr;
+ ret = 0;
+ } else
+ ret = -1;
+ break;
+#endif
+ default:
+ ret = -1;
+ break;
+ }
+
+ if (!ret)
+ p->dev = dev;
+
+ return ret;
+}
+
+static void
+pci_uio_ioport_read(struct rte_pci_ioport *p,
+ void *data, size_t len, off_t offset)
+{
+#if defined(RTE_ARCH_X86)
+ uint8_t *d;
+ int size;
+ unsigned short reg = p->base + offset;
+
+ for (d = data; len > 0; d += size, reg += size, len -= size) {
+ if (len >= 4) {
+ size = 4;
+ *(uint32_t *)d = inl(reg);
+ } else if (len >= 2) {
+ size = 2;
+ *(uint16_t *)d = inw(reg);
+ } else {
+ size = 1;
+ *d = inb(reg);
+ }
+ }
+#else
+ RTE_SET_USED(p);
+ RTE_SET_USED(data);
+ RTE_SET_USED(len);
+ RTE_SET_USED(offset);
+#endif
+}
+
+void
+rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
+ void *data, size_t len, off_t offset)
+{
+ switch (p->dev->kdrv) {
+ case RTE_KDRV_NIC_UIO:
+ pci_uio_ioport_read(p, data, len, offset);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+pci_uio_ioport_write(struct rte_pci_ioport *p,
+ const void *data, size_t len, off_t offset)
+{
+#if defined(RTE_ARCH_X86)
+ const uint8_t *s;
+ int size;
+ unsigned short reg = p->base + offset;
+
+ for (s = data; len > 0; s += size, reg += size, len -= size) {
+ if (len >= 4) {
+ size = 4;
+ outl(*(const uint32_t *)s, reg);
+ } else if (len >= 2) {
+ size = 2;
+ outw(*(const uint16_t *)s, reg);
+ } else {
+ size = 1;
+ outb(*s, reg);
+ }
+ }
+#else
+ RTE_SET_USED(p);
+ RTE_SET_USED(data);
+ RTE_SET_USED(len);
+ RTE_SET_USED(offset);
+#endif
+}
+
+void
+rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
+ const void *data, size_t len, off_t offset)
+{
+ switch (p->dev->kdrv) {
+ case RTE_KDRV_NIC_UIO:
+ pci_uio_ioport_write(p, data, len, offset);
+ break;
+ default:
+ break;
+ }
+}
+
+int
+rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
+{
+ int ret;
+
+ switch (p->dev->kdrv) {
+#if defined(RTE_ARCH_X86)
+ case RTE_KDRV_NIC_UIO:
+ ret = 0;
+ break;
+#endif
+ default:
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+/* Init the PCI EAL subsystem */
+int
+rte_eal_pci_init(void)
+{
+ TAILQ_INIT(&pci_driver_list);
+ TAILQ_INIT(&pci_device_list);
+
+ /* for debug purposes, PCI can be disabled */
+ if (internal_config.no_pci)
+ return 0;
+
+ if (rte_eal_pci_scan() < 0) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__);
+ return -1;
+ }
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c
new file mode 100644
index 00000000..9a034373
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
@@ -0,0 +1,201 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sched.h>
+#include <pthread_np.h>
+#include <sys/queue.h>
+#include <sys/thr.h>
+
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
+RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
+
+/*
+ * Send a message to a slave lcore identified by slave_id to call a
+ * function f with argument arg. Once the execution is done, the
+ * remote lcore switch in FINISHED state.
+ */
+int
+rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
+{
+ int n;
+ char c = 0;
+ int m2s = lcore_config[slave_id].pipe_master2slave[1];
+ int s2m = lcore_config[slave_id].pipe_slave2master[0];
+
+ if (lcore_config[slave_id].state != WAIT)
+ return -EBUSY;
+
+ lcore_config[slave_id].f = f;
+ lcore_config[slave_id].arg = arg;
+
+ /* send message */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = write(m2s, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ /* wait ack */
+ do {
+ n = read(s2m, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ return 0;
+}
+
+/* set affinity for current thread */
+static int
+eal_thread_set_affinity(void)
+{
+ unsigned lcore_id = rte_lcore_id();
+
+ /* acquire system unique id */
+ rte_gettid();
+
+ /* update EAL thread core affinity */
+ return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
+}
+
+void eal_thread_init_master(unsigned lcore_id)
+{
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+ /* set CPU affinity */
+ if (eal_thread_set_affinity() < 0)
+ rte_panic("cannot set affinity\n");
+}
+
+/* main loop of threads */
+__attribute__((noreturn)) void *
+eal_thread_loop(__attribute__((unused)) void *arg)
+{
+ char c;
+ int n, ret;
+ unsigned lcore_id;
+ pthread_t thread_id;
+ int m2s, s2m;
+ char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+
+ thread_id = pthread_self();
+
+ /* retrieve our lcore_id from the configuration structure */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (thread_id == lcore_config[lcore_id].thread_id)
+ break;
+ }
+ if (lcore_id == RTE_MAX_LCORE)
+ rte_panic("cannot retrieve lcore id\n");
+
+ m2s = lcore_config[lcore_id].pipe_master2slave[0];
+ s2m = lcore_config[lcore_id].pipe_slave2master[1];
+
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+ /* set CPU affinity */
+ if (eal_thread_set_affinity() < 0)
+ rte_panic("cannot set affinity\n");
+
+ ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+
+ RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
+ lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
+
+ /* read on our pipe to get commands */
+ while (1) {
+ void *fct_arg;
+
+ /* wait command */
+ do {
+ n = read(m2s, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ lcore_config[lcore_id].state = RUNNING;
+
+ /* send ack */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = write(s2m, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ if (lcore_config[lcore_id].f == NULL)
+ rte_panic("NULL function pointer\n");
+
+ /* call the function and store the return value */
+ fct_arg = lcore_config[lcore_id].arg;
+ ret = lcore_config[lcore_id].f(fct_arg);
+ lcore_config[lcore_id].ret = ret;
+ rte_wmb();
+ lcore_config[lcore_id].state = FINISHED;
+ }
+
+ /* never reached */
+ /* pthread_exit(NULL); */
+ /* return NULL; */
+}
+
+/* require calling thread tid by gettid() */
+int rte_sys_gettid(void)
+{
+ long lwpid;
+ thr_self(&lwpid);
+ return (int)lwpid;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_timer.c b/lib/librte_eal/bsdapp/eal/eal_timer.c
new file mode 100644
index 00000000..f12d9bd2
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_timer.c
@@ -0,0 +1,94 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+
+#ifdef RTE_LIBEAL_USE_HPET
+#warning HPET is not supported in FreeBSD
+#endif
+
+enum timer_source eal_timer_source = EAL_TIMER_TSC;
+
+uint64_t
+get_tsc_freq(void)
+{
+ size_t sz;
+ int tmp;
+ uint64_t tsc_hz;
+
+ sz = sizeof(tmp);
+ tmp = 0;
+
+ if (sysctlbyname("kern.timecounter.smp_tsc", &tmp, &sz, NULL, 0))
+ RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+ else if (tmp != 1)
+ RTE_LOG(WARNING, EAL, "TSC is not safe to use in SMP mode\n");
+
+ tmp = 0;
+
+ if (sysctlbyname("kern.timecounter.invariant_tsc", &tmp, &sz, NULL, 0))
+ RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+ else if (tmp != 1)
+ RTE_LOG(WARNING, EAL, "TSC is not invariant\n");
+
+ sz = sizeof(tsc_hz);
+ if (sysctlbyname("machdep.tsc_freq", &tsc_hz, &sz, NULL, 0)) {
+ RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+ return 0;
+ }
+
+ return tsc_hz;
+}
+
+int
+rte_eal_timer_init(void)
+{
+ set_tsc_freq();
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h
new file mode 100644
index 00000000..99a33432
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h
@@ -0,0 +1,107 @@
+/*-
+ * This file is provided under a dual BSD/LGPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GNU LESSER GENERAL PUBLIC LICENSE
+ *
+ * Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Contact Information:
+ * Intel Corporation
+ *
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_DOM0_COMMON_H_
+#define _RTE_DOM0_COMMON_H_
+
+#ifdef __KERNEL__
+#include <linux/if.h>
+#endif
+
+#define DOM0_NAME_MAX 256
+#define DOM0_MM_DEV "/dev/dom0_mm"
+
+#define DOM0_CONTIG_NUM_ORDER 9 /**< 2M order */
+#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */
+#define DOM0_MEMBLOCK_SIZE 0x200000 /**< Maximum nb. of memory block(2M). */
+#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */
+#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
+
+#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info)
+#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *)
+#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int)
+#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *)
+
+/**
+ * A structure used to store memory information.
+ */
+struct memory_info {
+ char name[DOM0_NAME_MAX];
+ uint64_t size;
+};
+
+/**
+ * A structure used to store memory segment information.
+ */
+struct memseg_info {
+ uint32_t idx;
+ uint64_t pfn;
+ uint64_t size;
+ uint64_t mfn[DOM0_NUM_MEMBLOCK];
+};
+
+/**
+ * A structure used to store memory block information.
+ */
+struct memblock_info {
+ uint8_t exchange_flag;
+ uint64_t vir_addr;
+ uint64_t pfn;
+ uint64_t mfn;
+};
+#endif /* _RTE_DOM0_COMMON_H_ */
diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
new file mode 100644
index 00000000..c1995ee1
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
@@ -0,0 +1,137 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_INTERRUPTS_H_
+#error "don't include this file directly, please include generic <rte_interrupts.h>"
+#endif
+
+#ifndef _RTE_BSDAPP_INTERRUPTS_H_
+#define _RTE_BSDAPP_INTERRUPTS_H_
+
+#define RTE_INTR_VEC_ZERO_OFFSET 0
+#define RTE_INTR_VEC_RXTX_OFFSET 1
+
+#define RTE_MAX_RXTX_INTR_VEC_ID 32
+
+enum rte_intr_handle_type {
+ RTE_INTR_HANDLE_UNKNOWN = 0,
+ RTE_INTR_HANDLE_UIO, /**< uio device handle */
+ RTE_INTR_HANDLE_ALARM, /**< alarm handle */
+ RTE_INTR_HANDLE_MAX
+};
+
+/** Handle for interrupts. */
+struct rte_intr_handle {
+ int fd; /**< file descriptor */
+ int uio_cfg_fd; /**< UIO config file descriptor */
+ enum rte_intr_handle_type type; /**< handle type */
+ int max_intr; /**< max interrupt requested */
+ uint32_t nb_efd; /**< number of available efds */
+ int *intr_vec; /**< intr vector number array */
+};
+
+/**
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ * @param epfd
+ * Epoll instance fd which the intr vector associated to.
+ * @param op
+ * The operation be performed for the vector.
+ * Operation type of {ADD, DEL}.
+ * @param vec
+ * RX intr vector number added to the epoll instance wait list.
+ * @param data
+ * User raw data.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
+ int epfd, int op, unsigned int vec, void *data);
+
+/**
+ * It enables the fastpath event fds if it's necessary.
+ * It creates event fds when multi-vectors allowed,
+ * otherwise it multiplexes the single event fds.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ * @param nb_efd
+ * Number of interrupt vector trying to enable.
+ * The value 0 is not allowed.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd);
+
+/**
+ * It disable the fastpath event fds.
+ * It deletes registered eventfds and closes the open fds.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle);
+
+/**
+ * The fastpath interrupt is enabled or not.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int rte_intr_dp_is_en(struct rte_intr_handle *intr_handle);
+
+/**
+ * The interrupt handle instance allows other cause or not.
+ * Other cause stands for none fastpath interrupt.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int rte_intr_allow_others(struct rte_intr_handle *intr_handle);
+
+/**
+ * The multiple interrupt vector capability of interrupt handle instance.
+ * It returns zero if no multiple interrupt vector support.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
+
+#endif /* _RTE_BSDAPP_INTERRUPTS_H_ */
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
new file mode 100644
index 00000000..58c2951e
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -0,0 +1,153 @@
+DPDK_2.0 {
+ global:
+
+ __rte_panic;
+ devargs_list;
+ eal_parse_sysfs_value;
+ eal_timer_source;
+ lcore_config;
+ pci_device_list;
+ pci_driver_list;
+ per_lcore__lcore_id;
+ per_lcore__rte_errno;
+ rte_calloc;
+ rte_calloc_socket;
+ rte_cpu_check_supported;
+ rte_cpu_get_flag_enabled;
+ rte_cycles_vmware_tsc_map;
+ rte_delay_us;
+ rte_dump_physmem_layout;
+ rte_dump_registers;
+ rte_dump_stack;
+ rte_dump_tailq;
+ rte_eal_alarm_cancel;
+ rte_eal_alarm_set;
+ rte_eal_dev_init;
+ rte_eal_devargs_add;
+ rte_eal_devargs_dump;
+ rte_eal_devargs_type_count;
+ rte_eal_driver_register;
+ rte_eal_driver_unregister;
+ rte_eal_get_configuration;
+ rte_eal_get_lcore_state;
+ rte_eal_get_physmem_layout;
+ rte_eal_get_physmem_size;
+ rte_eal_has_hugepages;
+ rte_eal_hpet_init;
+ rte_eal_init;
+ rte_eal_iopl_init;
+ rte_eal_lcore_role;
+ rte_eal_mp_remote_launch;
+ rte_eal_mp_wait_lcore;
+ rte_eal_parse_devargs_str;
+ rte_eal_pci_dump;
+ rte_eal_pci_probe;
+ rte_eal_pci_probe_one;
+ rte_eal_pci_register;
+ rte_eal_pci_scan;
+ rte_eal_pci_unregister;
+ rte_eal_process_type;
+ rte_eal_remote_launch;
+ rte_eal_tailq_lookup;
+ rte_eal_tailq_register;
+ rte_eal_vdev_init;
+ rte_eal_vdev_uninit;
+ rte_eal_wait_lcore;
+ rte_exit;
+ rte_free;
+ rte_get_hpet_cycles;
+ rte_get_hpet_hz;
+ rte_get_log_level;
+ rte_get_log_type;
+ rte_get_tsc_hz;
+ rte_hexdump;
+ rte_intr_callback_register;
+ rte_intr_callback_unregister;
+ rte_intr_disable;
+ rte_intr_enable;
+ rte_log;
+ rte_log_add_in_history;
+ rte_log_cur_msg_loglevel;
+ rte_log_cur_msg_logtype;
+ rte_log_dump_history;
+ rte_log_set_history;
+ rte_logs;
+ rte_malloc;
+ rte_malloc_dump_stats;
+ rte_malloc_get_socket_stats;
+ rte_malloc_set_limit;
+ rte_malloc_socket;
+ rte_malloc_validate;
+ rte_malloc_virt2phy;
+ rte_mem_lock_page;
+ rte_mem_phy2mch;
+ rte_mem_virt2phy;
+ rte_memdump;
+ rte_memory_get_nchannel;
+ rte_memory_get_nrank;
+ rte_memzone_dump;
+ rte_memzone_lookup;
+ rte_memzone_reserve;
+ rte_memzone_reserve_aligned;
+ rte_memzone_reserve_bounded;
+ rte_memzone_walk;
+ rte_openlog_stream;
+ rte_realloc;
+ rte_set_application_usage_hook;
+ rte_set_log_level;
+ rte_set_log_type;
+ rte_socket_id;
+ rte_strerror;
+ rte_strsplit;
+ rte_sys_gettid;
+ rte_thread_get_affinity;
+ rte_thread_set_affinity;
+ rte_vlog;
+ rte_xen_dom0_memory_attach;
+ rte_xen_dom0_memory_init;
+ rte_zmalloc;
+ rte_zmalloc_socket;
+
+ local: *;
+};
+
+DPDK_2.1 {
+ global:
+
+ rte_eal_pci_detach;
+ rte_eal_pci_read_config;
+ rte_eal_pci_write_config;
+ rte_intr_allow_others;
+ rte_intr_dp_is_en;
+ rte_intr_efd_disable;
+ rte_intr_efd_enable;
+ rte_intr_rx_ctl;
+ rte_memzone_free;
+
+} DPDK_2.0;
+
+DPDK_2.2 {
+ global:
+
+ rte_intr_cap_multiple;
+ rte_keepalive_create;
+ rte_keepalive_dispatch_pings;
+ rte_keepalive_mark_alive;
+ rte_keepalive_register_core;
+ rte_xen_dom0_supported;
+
+} DPDK_2.1;
+
+DPDK_16.04 {
+ global:
+
+ rte_cpu_get_flag_name;
+ rte_eal_pci_ioport_map;
+ rte_eal_pci_ioport_read;
+ rte_eal_pci_ioport_unmap;
+ rte_eal_pci_ioport_write;
+ rte_eal_pci_map_device;
+ rte_eal_pci_unmap_device;
+ rte_eal_primary_proc_alive;
+
+} DPDK_2.2;
diff --git a/lib/librte_eal/bsdapp/nic_uio/BSDmakefile b/lib/librte_eal/bsdapp/nic_uio/BSDmakefile
new file mode 100644
index 00000000..5454ed85
--- /dev/null
+++ b/lib/librte_eal/bsdapp/nic_uio/BSDmakefile
@@ -0,0 +1,36 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+KMOD= nic_uio
+SRCS= nic_uio.c device_if.h bus_if.h pci_if.h
+
+.include <bsd.kmod.mk>
diff --git a/lib/librte_eal/bsdapp/nic_uio/Makefile b/lib/librte_eal/bsdapp/nic_uio/Makefile
new file mode 100644
index 00000000..89957615
--- /dev/null
+++ b/lib/librte_eal/bsdapp/nic_uio/Makefile
@@ -0,0 +1,52 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = nic_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR)
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := nic_uio.c
+
+include $(RTE_SDK)/mk/rte.bsdmodule.mk
diff --git a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c
new file mode 100644
index 00000000..99a4975c
--- /dev/null
+++ b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c
@@ -0,0 +1,335 @@
+/* -
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/module.h>
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h> /* cdevsw struct */
+#include <sys/bus.h> /* structs, prototypes for pci bus stuff and DEVMETHOD */
+#include <sys/rman.h>
+#include <sys/systm.h>
+#include <sys/rwlock.h>
+#include <sys/proc.h>
+
+#include <machine/bus.h>
+#include <dev/pci/pcivar.h> /* For pci_get macros! */
+#include <dev/pci/pcireg.h> /* The softc holds our per-instance data. */
+#include <vm/vm.h>
+#include <vm/uma.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+
+#define MAX_BARS (PCIR_MAX_BAR_0 + 1)
+
+#define MAX_DETACHED_DEVICES 128
+static device_t detached_devices[MAX_DETACHED_DEVICES] = {};
+static int num_detached = 0;
+
+struct nic_uio_softc {
+ device_t dev_t;
+ struct cdev *my_cdev;
+ int bar_id[MAX_BARS];
+ struct resource *bar_res[MAX_BARS];
+ u_long bar_start[MAX_BARS];
+ u_long bar_size[MAX_BARS];
+};
+
+/* Function prototypes */
+static d_open_t nic_uio_open;
+static d_close_t nic_uio_close;
+static d_mmap_t nic_uio_mmap;
+static d_mmap_single_t nic_uio_mmap_single;
+static int nic_uio_probe(device_t dev);
+static int nic_uio_attach(device_t dev);
+static int nic_uio_detach(device_t dev);
+static int nic_uio_shutdown(void);
+static int nic_uio_modevent(module_t mod, int type, void *arg);
+
+static struct cdevsw uio_cdevsw = {
+ .d_name = "nic_uio",
+ .d_version = D_VERSION,
+ .d_open = nic_uio_open,
+ .d_close = nic_uio_close,
+ .d_mmap = nic_uio_mmap,
+ .d_mmap_single = nic_uio_mmap_single,
+};
+
+static device_method_t nic_uio_methods[] = {
+ DEVMETHOD(device_probe, nic_uio_probe),
+ DEVMETHOD(device_attach, nic_uio_attach),
+ DEVMETHOD(device_detach, nic_uio_detach),
+ DEVMETHOD_END
+};
+
+struct device {
+ int vend;
+ int dev;
+};
+
+struct pci_bdf {
+ uint32_t bus;
+ uint32_t devid;
+ uint32_t function;
+};
+
+static devclass_t nic_uio_devclass;
+
+DEFINE_CLASS_0(nic_uio, nic_uio_driver, nic_uio_methods, sizeof(struct nic_uio_softc));
+DRIVER_MODULE(nic_uio, pci, nic_uio_driver, nic_uio_devclass, nic_uio_modevent, 0);
+
+static int
+nic_uio_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
+ int prot, vm_memattr_t *memattr)
+{
+ *paddr = offset;
+ return 0;
+}
+
+static int
+nic_uio_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
+ struct vm_object **obj, int nprot)
+{
+ /*
+ * The BAR index is encoded in the offset. Divide the offset by
+ * PAGE_SIZE to get the index of the bar requested by the user
+ * app.
+ */
+ unsigned bar = *offset/PAGE_SIZE;
+ struct nic_uio_softc *sc = cdev->si_drv1;
+
+ if (bar >= MAX_BARS)
+ return EINVAL;
+
+ if (sc->bar_res[bar] == NULL) {
+ sc->bar_id[bar] = PCIR_BAR(bar);
+
+ if (PCI_BAR_IO(pci_read_config(sc->dev_t, sc->bar_id[bar], 4)))
+ sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_IOPORT,
+ &sc->bar_id[bar], RF_ACTIVE);
+ else
+ sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_MEMORY,
+ &sc->bar_id[bar], RF_ACTIVE);
+ }
+ if (sc->bar_res[bar] == NULL)
+ return ENXIO;
+
+ sc->bar_start[bar] = rman_get_start(sc->bar_res[bar]);
+ sc->bar_size[bar] = rman_get_size(sc->bar_res[bar]);
+
+ device_printf(sc->dev_t, "Bar %u @ %lx, size %lx\n", bar,
+ sc->bar_start[bar], sc->bar_size[bar]);
+
+ *offset = sc->bar_start[bar];
+ *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset,
+ curthread->td_ucred);
+ return 0;
+}
+
+
+int
+nic_uio_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+ return 0;
+}
+
+int
+nic_uio_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+{
+ return 0;
+}
+
+static int
+nic_uio_probe (device_t dev)
+{
+ int i;
+ unsigned int bus = pci_get_bus(dev);
+ unsigned int device = pci_get_slot(dev);
+ unsigned int function = pci_get_function(dev);
+
+ for (i = 0; i < num_detached; i++)
+ if (bus == pci_get_bus(detached_devices[i]) &&
+ device == pci_get_slot(detached_devices[i]) &&
+ function == pci_get_function(detached_devices[i])) {
+ device_set_desc(dev, "DPDK PCI Device");
+ return BUS_PROBE_SPECIFIC;
+ }
+
+ return ENXIO;
+}
+
+static int
+nic_uio_attach(device_t dev)
+{
+ int i;
+ struct nic_uio_softc *sc;
+
+ sc = device_get_softc(dev);
+ sc->dev_t = dev;
+ sc->my_cdev = make_dev(&uio_cdevsw, device_get_unit(dev),
+ UID_ROOT, GID_WHEEL, 0600, "uio@pci:%u:%u:%u",
+ pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
+ if (sc->my_cdev == NULL)
+ return ENXIO;
+ sc->my_cdev->si_drv1 = sc;
+
+ for (i = 0; i < MAX_BARS; i++)
+ sc->bar_res[i] = NULL;
+
+ pci_enable_busmaster(dev);
+
+ return 0;
+}
+
+static int
+nic_uio_detach(device_t dev)
+{
+ int i;
+ struct nic_uio_softc *sc;
+ sc = device_get_softc(dev);
+
+ for (i = 0; i < MAX_BARS; i++)
+ if (sc->bar_res[i] != NULL) {
+
+ if (PCI_BAR_IO(pci_read_config(dev, sc->bar_id[i], 4)))
+ bus_release_resource(dev, SYS_RES_IOPORT, sc->bar_id[i],
+ sc->bar_res[i]);
+ else
+ bus_release_resource(dev, SYS_RES_MEMORY, sc->bar_id[i],
+ sc->bar_res[i]);
+ }
+
+ if (sc->my_cdev != NULL)
+ destroy_dev(sc->my_cdev);
+ return 0;
+}
+
+static void
+nic_uio_load(void)
+{
+ uint32_t bus, device, function;
+ device_t dev;
+ char bdf_str[256];
+ char *token, *remaining;
+
+ memset(bdf_str, 0, sizeof(bdf_str));
+ TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str));
+ remaining = bdf_str;
+ /*
+ * Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f".
+ * But the code below does not try differentiate between : and ,
+ * and just blindly uses 3 tokens at a time to construct a
+ * bus/device/function tuple.
+ *
+ * There is no checking on strtol() return values, but this should
+ * be OK. Worst case is it cannot convert and returns 0. This
+ * could give us a different BDF than intended, but as long as the
+ * PCI device/vendor ID does not match it will not matter.
+ */
+ while (1) {
+ if (remaining == NULL || remaining[0] == '\0')
+ break;
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ bus = strtol(token, NULL, 10);
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ device = strtol(token, NULL, 10);
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ function = strtol(token, NULL, 10);
+
+ dev = pci_find_bsf(bus, device, function);
+ if (dev == NULL)
+ continue;
+
+ if (num_detached < MAX_DETACHED_DEVICES) {
+ printf("nic_uio_load: detaching and storing dev=%p\n",
+ dev);
+ detached_devices[num_detached++] = dev;
+ } else {
+ printf("nic_uio_load: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n",
+ MAX_DETACHED_DEVICES, dev);
+ }
+ device_detach(dev);
+ }
+}
+
+static void
+nic_uio_unload(void)
+{
+ int i;
+ printf("nic_uio_unload: entered...\n");
+
+ for (i = 0; i < num_detached; i++) {
+ printf("nic_uio_unload: calling to device_probe_and_attach for dev=%p...\n",
+ detached_devices[i]);
+ device_probe_and_attach(detached_devices[i]);
+ printf("nic_uio_unload: done.\n");
+ }
+
+ printf("nic_uio_unload: leaving...\n");
+}
+
+static int
+nic_uio_shutdown(void)
+{
+ return 0;
+}
+
+static int
+nic_uio_modevent(module_t mod, int type, void *arg)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ nic_uio_load();
+ break;
+ case MOD_UNLOAD:
+ nic_uio_unload();
+ break;
+ case MOD_SHUTDOWN:
+ nic_uio_shutdown();
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
new file mode 100644
index 00000000..f5ea0ee8
--- /dev/null
+++ b/lib/librte_eal/common/Makefile
@@ -0,0 +1,61 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+INC := rte_branch_prediction.h rte_common.h
+INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
+INC += rte_tailq.h rte_interrupts.h rte_alarm.h
+INC += rte_string_fns.h rte_version.h
+INC += rte_eal_memconfig.h rte_malloc_heap.h
+INC += rte_hexdump.h rte_devargs.h rte_dev.h
+INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
+INC += rte_malloc.h rte_keepalive.h rte_time.h
+
+ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
+INC += rte_warnings.h
+endif
+
+GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
+GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
+# defined in mk/arch/$(RTE_ARCH)/rte.vars.mk
+ARCH_DIR ?= $(RTE_ARCH)
+ARCH_INC := $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h))
+
+SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC))
+SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \
+ $(addprefix include/arch/$(ARCH_DIR)/,$(ARCH_INC))
+SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include/generic := \
+ $(addprefix include/generic/,$(GENERIC_INC))
+
+include $(RTE_SDK)/mk/rte.install.mk
diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
new file mode 100644
index 00000000..23240efd
--- /dev/null
+++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
@@ -0,0 +1,179 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium networks Ltd. 2015.
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium networks nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rte_cpuflags.h"
+
+#include <elf.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+
+#ifndef AT_HWCAP
+#define AT_HWCAP 16
+#endif
+
+#ifndef AT_HWCAP2
+#define AT_HWCAP2 26
+#endif
+
+#ifndef AT_PLATFORM
+#define AT_PLATFORM 15
+#endif
+
+enum cpu_register_t {
+ REG_NONE = 0,
+ REG_HWCAP,
+ REG_HWCAP2,
+ REG_PLATFORM,
+ REG_MAX
+};
+
+typedef uint32_t hwcap_registers_t[REG_MAX];
+
+/**
+ * Struct to hold a processor feature entry
+ */
+struct feature_entry {
+ uint32_t reg;
+ uint32_t bit;
+#define CPU_FLAG_NAME_MAX_LEN 64
+ char name[CPU_FLAG_NAME_MAX_LEN];
+};
+
+#define FEAT_DEF(name, reg, bit) \
+ [RTE_CPUFLAG_##name] = {reg, bit, #name},
+
+#ifdef RTE_ARCH_ARMv7
+#define PLATFORM_STR "v7l"
+typedef Elf32_auxv_t _Elfx_auxv_t;
+
+const struct feature_entry rte_cpu_feature_table[] = {
+ FEAT_DEF(SWP, REG_HWCAP, 0)
+ FEAT_DEF(HALF, REG_HWCAP, 1)
+ FEAT_DEF(THUMB, REG_HWCAP, 2)
+ FEAT_DEF(A26BIT, REG_HWCAP, 3)
+ FEAT_DEF(FAST_MULT, REG_HWCAP, 4)
+ FEAT_DEF(FPA, REG_HWCAP, 5)
+ FEAT_DEF(VFP, REG_HWCAP, 6)
+ FEAT_DEF(EDSP, REG_HWCAP, 7)
+ FEAT_DEF(JAVA, REG_HWCAP, 8)
+ FEAT_DEF(IWMMXT, REG_HWCAP, 9)
+ FEAT_DEF(CRUNCH, REG_HWCAP, 10)
+ FEAT_DEF(THUMBEE, REG_HWCAP, 11)
+ FEAT_DEF(NEON, REG_HWCAP, 12)
+ FEAT_DEF(VFPv3, REG_HWCAP, 13)
+ FEAT_DEF(VFPv3D16, REG_HWCAP, 14)
+ FEAT_DEF(TLS, REG_HWCAP, 15)
+ FEAT_DEF(VFPv4, REG_HWCAP, 16)
+ FEAT_DEF(IDIVA, REG_HWCAP, 17)
+ FEAT_DEF(IDIVT, REG_HWCAP, 18)
+ FEAT_DEF(VFPD32, REG_HWCAP, 19)
+ FEAT_DEF(LPAE, REG_HWCAP, 20)
+ FEAT_DEF(EVTSTRM, REG_HWCAP, 21)
+ FEAT_DEF(AES, REG_HWCAP2, 0)
+ FEAT_DEF(PMULL, REG_HWCAP2, 1)
+ FEAT_DEF(SHA1, REG_HWCAP2, 2)
+ FEAT_DEF(SHA2, REG_HWCAP2, 3)
+ FEAT_DEF(CRC32, REG_HWCAP2, 4)
+ FEAT_DEF(V7L, REG_PLATFORM, 0)
+};
+
+#elif defined RTE_ARCH_ARM64
+#define PLATFORM_STR "aarch64"
+typedef Elf64_auxv_t _Elfx_auxv_t;
+
+const struct feature_entry rte_cpu_feature_table[] = {
+ FEAT_DEF(FP, REG_HWCAP, 0)
+ FEAT_DEF(NEON, REG_HWCAP, 1)
+ FEAT_DEF(EVTSTRM, REG_HWCAP, 2)
+ FEAT_DEF(AES, REG_HWCAP, 3)
+ FEAT_DEF(PMULL, REG_HWCAP, 4)
+ FEAT_DEF(SHA1, REG_HWCAP, 5)
+ FEAT_DEF(SHA2, REG_HWCAP, 6)
+ FEAT_DEF(CRC32, REG_HWCAP, 7)
+ FEAT_DEF(ATOMICS, REG_HWCAP, 8)
+ FEAT_DEF(AARCH64, REG_PLATFORM, 1)
+};
+#endif /* RTE_ARCH */
+
+/*
+ * Read AUXV software register and get cpu features for ARM
+ */
+static void
+rte_cpu_get_features(hwcap_registers_t out)
+{
+ int auxv_fd;
+ _Elfx_auxv_t auxv;
+
+ auxv_fd = open("/proc/self/auxv", O_RDONLY);
+ assert(auxv_fd);
+ while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) {
+ if (auxv.a_type == AT_HWCAP) {
+ out[REG_HWCAP] = auxv.a_un.a_val;
+ } else if (auxv.a_type == AT_HWCAP2) {
+ out[REG_HWCAP2] = auxv.a_un.a_val;
+ } else if (auxv.a_type == AT_PLATFORM) {
+ if (!strcmp((const char *)auxv.a_un.a_val, PLATFORM_STR))
+ out[REG_PLATFORM] = 0x0001;
+ }
+ }
+}
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+ const struct feature_entry *feat;
+ hwcap_registers_t regs = {0};
+
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return -ENOENT;
+
+ feat = &rte_cpu_feature_table[feature];
+ if (feat->reg == REG_NONE)
+ return -EFAULT;
+
+ rte_cpu_get_features(regs);
+ return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return NULL;
+ return rte_cpu_feature_table[feature].name;
+}
diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
new file mode 100644
index 00000000..a8147c86
--- /dev/null
+++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
@@ -0,0 +1,147 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "rte_cpuflags.h"
+
+#include <elf.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+
+/* Symbolic values for the entries in the auxiliary table */
+#define AT_HWCAP 16
+#define AT_HWCAP2 26
+
+/* software based registers */
+enum cpu_register_t {
+ REG_NONE = 0,
+ REG_HWCAP,
+ REG_HWCAP2,
+ REG_MAX
+};
+
+typedef uint32_t hwcap_registers_t[REG_MAX];
+
+struct feature_entry {
+ uint32_t reg;
+ uint32_t bit;
+#define CPU_FLAG_NAME_MAX_LEN 64
+ char name[CPU_FLAG_NAME_MAX_LEN];
+};
+
+#define FEAT_DEF(name, reg, bit) \
+ [RTE_CPUFLAG_##name] = {reg, bit, #name},
+
+const struct feature_entry rte_cpu_feature_table[] = {
+ FEAT_DEF(PPC_LE, REG_HWCAP, 0)
+ FEAT_DEF(TRUE_LE, REG_HWCAP, 1)
+ FEAT_DEF(PSERIES_PERFMON_COMPAT, REG_HWCAP, 6)
+ FEAT_DEF(VSX, REG_HWCAP, 7)
+ FEAT_DEF(ARCH_2_06, REG_HWCAP, 8)
+ FEAT_DEF(POWER6_EXT, REG_HWCAP, 9)
+ FEAT_DEF(DFP, REG_HWCAP, 10)
+ FEAT_DEF(PA6T, REG_HWCAP, 11)
+ FEAT_DEF(ARCH_2_05, REG_HWCAP, 12)
+ FEAT_DEF(ICACHE_SNOOP, REG_HWCAP, 13)
+ FEAT_DEF(SMT, REG_HWCAP, 14)
+ FEAT_DEF(BOOKE, REG_HWCAP, 15)
+ FEAT_DEF(CELLBE, REG_HWCAP, 16)
+ FEAT_DEF(POWER5_PLUS, REG_HWCAP, 17)
+ FEAT_DEF(POWER5, REG_HWCAP, 18)
+ FEAT_DEF(POWER4, REG_HWCAP, 19)
+ FEAT_DEF(NOTB, REG_HWCAP, 20)
+ FEAT_DEF(EFP_DOUBLE, REG_HWCAP, 21)
+ FEAT_DEF(EFP_SINGLE, REG_HWCAP, 22)
+ FEAT_DEF(SPE, REG_HWCAP, 23)
+ FEAT_DEF(UNIFIED_CACHE, REG_HWCAP, 24)
+ FEAT_DEF(4xxMAC, REG_HWCAP, 25)
+ FEAT_DEF(MMU, REG_HWCAP, 26)
+ FEAT_DEF(FPU, REG_HWCAP, 27)
+ FEAT_DEF(ALTIVEC, REG_HWCAP, 28)
+ FEAT_DEF(PPC601, REG_HWCAP, 29)
+ FEAT_DEF(PPC64, REG_HWCAP, 30)
+ FEAT_DEF(PPC32, REG_HWCAP, 31)
+ FEAT_DEF(TAR, REG_HWCAP2, 26)
+ FEAT_DEF(LSEL, REG_HWCAP2, 27)
+ FEAT_DEF(EBB, REG_HWCAP2, 28)
+ FEAT_DEF(DSCR, REG_HWCAP2, 29)
+ FEAT_DEF(HTM, REG_HWCAP2, 30)
+ FEAT_DEF(ARCH_2_07, REG_HWCAP2, 31)
+};
+
+/*
+ * Read AUXV software register and get cpu features for Power
+ */
+static void
+rte_cpu_get_features(hwcap_registers_t out)
+{
+ int auxv_fd;
+ Elf64_auxv_t auxv;
+
+ auxv_fd = open("/proc/self/auxv", O_RDONLY);
+ assert(auxv_fd);
+ while (read(auxv_fd, &auxv,
+ sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) {
+ if (auxv.a_type == AT_HWCAP)
+ out[REG_HWCAP] = auxv.a_un.a_val;
+ else if (auxv.a_type == AT_HWCAP2)
+ out[REG_HWCAP2] = auxv.a_un.a_val;
+ }
+}
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+ const struct feature_entry *feat;
+ hwcap_registers_t regs = {0};
+
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return -ENOENT;
+
+ feat = &rte_cpu_feature_table[feature];
+ if (feat->reg == REG_NONE)
+ return -EFAULT;
+
+ rte_cpu_get_features(regs);
+ return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return NULL;
+ return rte_cpu_feature_table[feature].name;
+}
diff --git a/lib/librte_eal/common/arch/tile/rte_cpuflags.c b/lib/librte_eal/common/arch/tile/rte_cpuflags.c
new file mode 100644
index 00000000..a2b6c51a
--- /dev/null
+++ b/lib/librte_eal/common/arch/tile/rte_cpuflags.c
@@ -0,0 +1,47 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of EZchip Semiconductor nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "rte_cpuflags.h"
+
+#include <errno.h>
+
+const struct feature_entry rte_cpu_feature_table[] = {
+};
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(__attribute__((unused)) enum rte_cpu_flag_t feature)
+{
+ return -ENOENT;
+}
diff --git a/lib/librte_eal/common/arch/x86/rte_cpuflags.c b/lib/librte_eal/common/arch/x86/rte_cpuflags.c
new file mode 100644
index 00000000..01382571
--- /dev/null
+++ b/lib/librte_eal/common/arch/x86/rte_cpuflags.c
@@ -0,0 +1,220 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rte_cpuflags.h"
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+
+enum cpu_register_t {
+ RTE_REG_EAX = 0,
+ RTE_REG_EBX,
+ RTE_REG_ECX,
+ RTE_REG_EDX,
+};
+
+typedef uint32_t cpuid_registers_t[4];
+
+/**
+ * Struct to hold a processor feature entry
+ */
+struct feature_entry {
+ uint32_t leaf; /**< cpuid leaf */
+ uint32_t subleaf; /**< cpuid subleaf */
+ uint32_t reg; /**< cpuid register */
+ uint32_t bit; /**< cpuid register bit */
+#define CPU_FLAG_NAME_MAX_LEN 64
+ char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */
+};
+
+#define FEAT_DEF(name, leaf, subleaf, reg, bit) \
+ [RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name },
+
+const struct feature_entry rte_cpu_feature_table[] = {
+ FEAT_DEF(SSE3, 0x00000001, 0, RTE_REG_ECX, 0)
+ FEAT_DEF(PCLMULQDQ, 0x00000001, 0, RTE_REG_ECX, 1)
+ FEAT_DEF(DTES64, 0x00000001, 0, RTE_REG_ECX, 2)
+ FEAT_DEF(MONITOR, 0x00000001, 0, RTE_REG_ECX, 3)
+ FEAT_DEF(DS_CPL, 0x00000001, 0, RTE_REG_ECX, 4)
+ FEAT_DEF(VMX, 0x00000001, 0, RTE_REG_ECX, 5)
+ FEAT_DEF(SMX, 0x00000001, 0, RTE_REG_ECX, 6)
+ FEAT_DEF(EIST, 0x00000001, 0, RTE_REG_ECX, 7)
+ FEAT_DEF(TM2, 0x00000001, 0, RTE_REG_ECX, 8)
+ FEAT_DEF(SSSE3, 0x00000001, 0, RTE_REG_ECX, 9)
+ FEAT_DEF(CNXT_ID, 0x00000001, 0, RTE_REG_ECX, 10)
+ FEAT_DEF(FMA, 0x00000001, 0, RTE_REG_ECX, 12)
+ FEAT_DEF(CMPXCHG16B, 0x00000001, 0, RTE_REG_ECX, 13)
+ FEAT_DEF(XTPR, 0x00000001, 0, RTE_REG_ECX, 14)
+ FEAT_DEF(PDCM, 0x00000001, 0, RTE_REG_ECX, 15)
+ FEAT_DEF(PCID, 0x00000001, 0, RTE_REG_ECX, 17)
+ FEAT_DEF(DCA, 0x00000001, 0, RTE_REG_ECX, 18)
+ FEAT_DEF(SSE4_1, 0x00000001, 0, RTE_REG_ECX, 19)
+ FEAT_DEF(SSE4_2, 0x00000001, 0, RTE_REG_ECX, 20)
+ FEAT_DEF(X2APIC, 0x00000001, 0, RTE_REG_ECX, 21)
+ FEAT_DEF(MOVBE, 0x00000001, 0, RTE_REG_ECX, 22)
+ FEAT_DEF(POPCNT, 0x00000001, 0, RTE_REG_ECX, 23)
+ FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, RTE_REG_ECX, 24)
+ FEAT_DEF(AES, 0x00000001, 0, RTE_REG_ECX, 25)
+ FEAT_DEF(XSAVE, 0x00000001, 0, RTE_REG_ECX, 26)
+ FEAT_DEF(OSXSAVE, 0x00000001, 0, RTE_REG_ECX, 27)
+ FEAT_DEF(AVX, 0x00000001, 0, RTE_REG_ECX, 28)
+ FEAT_DEF(F16C, 0x00000001, 0, RTE_REG_ECX, 29)
+ FEAT_DEF(RDRAND, 0x00000001, 0, RTE_REG_ECX, 30)
+
+ FEAT_DEF(FPU, 0x00000001, 0, RTE_REG_EDX, 0)
+ FEAT_DEF(VME, 0x00000001, 0, RTE_REG_EDX, 1)
+ FEAT_DEF(DE, 0x00000001, 0, RTE_REG_EDX, 2)
+ FEAT_DEF(PSE, 0x00000001, 0, RTE_REG_EDX, 3)
+ FEAT_DEF(TSC, 0x00000001, 0, RTE_REG_EDX, 4)
+ FEAT_DEF(MSR, 0x00000001, 0, RTE_REG_EDX, 5)
+ FEAT_DEF(PAE, 0x00000001, 0, RTE_REG_EDX, 6)
+ FEAT_DEF(MCE, 0x00000001, 0, RTE_REG_EDX, 7)
+ FEAT_DEF(CX8, 0x00000001, 0, RTE_REG_EDX, 8)
+ FEAT_DEF(APIC, 0x00000001, 0, RTE_REG_EDX, 9)
+ FEAT_DEF(SEP, 0x00000001, 0, RTE_REG_EDX, 11)
+ FEAT_DEF(MTRR, 0x00000001, 0, RTE_REG_EDX, 12)
+ FEAT_DEF(PGE, 0x00000001, 0, RTE_REG_EDX, 13)
+ FEAT_DEF(MCA, 0x00000001, 0, RTE_REG_EDX, 14)
+ FEAT_DEF(CMOV, 0x00000001, 0, RTE_REG_EDX, 15)
+ FEAT_DEF(PAT, 0x00000001, 0, RTE_REG_EDX, 16)
+ FEAT_DEF(PSE36, 0x00000001, 0, RTE_REG_EDX, 17)
+ FEAT_DEF(PSN, 0x00000001, 0, RTE_REG_EDX, 18)
+ FEAT_DEF(CLFSH, 0x00000001, 0, RTE_REG_EDX, 19)
+ FEAT_DEF(DS, 0x00000001, 0, RTE_REG_EDX, 21)
+ FEAT_DEF(ACPI, 0x00000001, 0, RTE_REG_EDX, 22)
+ FEAT_DEF(MMX, 0x00000001, 0, RTE_REG_EDX, 23)
+ FEAT_DEF(FXSR, 0x00000001, 0, RTE_REG_EDX, 24)
+ FEAT_DEF(SSE, 0x00000001, 0, RTE_REG_EDX, 25)
+ FEAT_DEF(SSE2, 0x00000001, 0, RTE_REG_EDX, 26)
+ FEAT_DEF(SS, 0x00000001, 0, RTE_REG_EDX, 27)
+ FEAT_DEF(HTT, 0x00000001, 0, RTE_REG_EDX, 28)
+ FEAT_DEF(TM, 0x00000001, 0, RTE_REG_EDX, 29)
+ FEAT_DEF(PBE, 0x00000001, 0, RTE_REG_EDX, 31)
+
+ FEAT_DEF(DIGTEMP, 0x00000006, 0, RTE_REG_EAX, 0)
+ FEAT_DEF(TRBOBST, 0x00000006, 0, RTE_REG_EAX, 1)
+ FEAT_DEF(ARAT, 0x00000006, 0, RTE_REG_EAX, 2)
+ FEAT_DEF(PLN, 0x00000006, 0, RTE_REG_EAX, 4)
+ FEAT_DEF(ECMD, 0x00000006, 0, RTE_REG_EAX, 5)
+ FEAT_DEF(PTM, 0x00000006, 0, RTE_REG_EAX, 6)
+
+ FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, RTE_REG_ECX, 0)
+ FEAT_DEF(ACNT2, 0x00000006, 0, RTE_REG_ECX, 1)
+ FEAT_DEF(ENERGY_EFF, 0x00000006, 0, RTE_REG_ECX, 3)
+
+ FEAT_DEF(FSGSBASE, 0x00000007, 0, RTE_REG_EBX, 0)
+ FEAT_DEF(BMI1, 0x00000007, 0, RTE_REG_EBX, 2)
+ FEAT_DEF(HLE, 0x00000007, 0, RTE_REG_EBX, 4)
+ FEAT_DEF(AVX2, 0x00000007, 0, RTE_REG_EBX, 5)
+ FEAT_DEF(SMEP, 0x00000007, 0, RTE_REG_EBX, 6)
+ FEAT_DEF(BMI2, 0x00000007, 0, RTE_REG_EBX, 7)
+ FEAT_DEF(ERMS, 0x00000007, 0, RTE_REG_EBX, 8)
+ FEAT_DEF(INVPCID, 0x00000007, 0, RTE_REG_EBX, 10)
+ FEAT_DEF(RTM, 0x00000007, 0, RTE_REG_EBX, 11)
+ FEAT_DEF(AVX512F, 0x00000007, 0, RTE_REG_EBX, 16)
+
+ FEAT_DEF(LAHF_SAHF, 0x80000001, 0, RTE_REG_ECX, 0)
+ FEAT_DEF(LZCNT, 0x80000001, 0, RTE_REG_ECX, 4)
+
+ FEAT_DEF(SYSCALL, 0x80000001, 0, RTE_REG_EDX, 11)
+ FEAT_DEF(XD, 0x80000001, 0, RTE_REG_EDX, 20)
+ FEAT_DEF(1GB_PG, 0x80000001, 0, RTE_REG_EDX, 26)
+ FEAT_DEF(RDTSCP, 0x80000001, 0, RTE_REG_EDX, 27)
+ FEAT_DEF(EM64T, 0x80000001, 0, RTE_REG_EDX, 29)
+
+ FEAT_DEF(INVTSC, 0x80000007, 0, RTE_REG_EDX, 8)
+};
+
+/*
+ * Execute CPUID instruction and get contents of a specific register
+ *
+ * This function, when compiled with GCC, will generate architecture-neutral
+ * code, as per GCC manual.
+ */
+static void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
+{
+#if defined(__i386__) && defined(__PIC__)
+ /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+ asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+ : "=r" (out[RTE_REG_EBX]),
+ "=a" (out[RTE_REG_EAX]),
+ "=c" (out[RTE_REG_ECX]),
+ "=d" (out[RTE_REG_EDX])
+ : "a" (leaf), "c" (subleaf));
+#else
+ asm volatile("cpuid"
+ : "=a" (out[RTE_REG_EAX]),
+ "=b" (out[RTE_REG_EBX]),
+ "=c" (out[RTE_REG_ECX]),
+ "=d" (out[RTE_REG_EDX])
+ : "a" (leaf), "c" (subleaf));
+#endif
+}
+
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+ const struct feature_entry *feat;
+ cpuid_registers_t regs;
+
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ /* Flag does not match anything in the feature tables */
+ return -ENOENT;
+
+ feat = &rte_cpu_feature_table[feature];
+
+ if (!feat->leaf)
+ /* This entry in the table wasn't filled out! */
+ return -EFAULT;
+
+ rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs);
+ if (((regs[RTE_REG_EAX] ^ feat->leaf) & 0xffff0000) ||
+ regs[RTE_REG_EAX] < feat->leaf)
+ return 0;
+
+ /* get the cpuid leaf containing the desired feature */
+ rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
+
+ /* check if the feature is enabled */
+ return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return NULL;
+ return rte_cpu_feature_table[feature].name;
+}
diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
new file mode 100644
index 00000000..ecb12409
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -0,0 +1,76 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_cpuflags.h>
+
+/**
+ * Checks if the machine is adequate for running the binary. If it is not, the
+ * program exits with status 1.
+ * The function attribute forces this function to be called before main(). But
+ * with ICC, the check is generated by the compiler.
+ */
+#ifndef __INTEL_COMPILER
+void __attribute__ ((__constructor__))
+#else
+void
+#endif
+rte_cpu_check_supported(void)
+{
+ /* This is generated at compile-time by the build system */
+ static const enum rte_cpu_flag_t compile_time_flags[] = {
+ RTE_COMPILE_TIME_CPUFLAGS
+ };
+ unsigned count = RTE_DIM(compile_time_flags), i;
+ int ret;
+
+ for (i = 0; i < count; i++) {
+ ret = rte_cpu_get_flag_enabled(compile_time_flags[i]);
+
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: CPU feature flag lookup failed with error %d\n",
+ ret);
+ exit(1);
+ }
+ if (!ret) {
+ fprintf(stderr,
+ "ERROR: This system does not support \"%s\".\n"
+ "Please check that RTE_MACHINE is set correctly.\n",
+ rte_cpu_get_flag_name(compile_time_flags[i]));
+ exit(1);
+ }
+ }
+}
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
new file mode 100644
index 00000000..a8a4146c
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -0,0 +1,152 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2014 6WIND S.A.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_debug.h>
+#include <rte_devargs.h>
+#include <rte_log.h>
+
+#include "eal_private.h"
+
+/** Global list of device drivers. */
+static struct rte_driver_list dev_driver_list =
+ TAILQ_HEAD_INITIALIZER(dev_driver_list);
+
+/* register a driver */
+void
+rte_eal_driver_register(struct rte_driver *driver)
+{
+ TAILQ_INSERT_TAIL(&dev_driver_list, driver, next);
+}
+
+/* unregister a driver */
+void
+rte_eal_driver_unregister(struct rte_driver *driver)
+{
+ TAILQ_REMOVE(&dev_driver_list, driver, next);
+}
+
+int
+rte_eal_vdev_init(const char *name, const char *args)
+{
+ struct rte_driver *driver;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ TAILQ_FOREACH(driver, &dev_driver_list, next) {
+ if (driver->type != PMD_VDEV)
+ continue;
+
+ /*
+ * search a driver prefix in virtual device name.
+ * For example, if the driver is pcap PMD, driver->name
+ * will be "eth_pcap", but "name" will be "eth_pcapN".
+ * So use strncmp to compare.
+ */
+ if (!strncmp(driver->name, name, strlen(driver->name)))
+ return driver->init(name, args);
+ }
+
+ RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+ return -EINVAL;
+}
+
+int
+rte_eal_dev_init(void)
+{
+ struct rte_devargs *devargs;
+ struct rte_driver *driver;
+
+ /*
+ * Note that the dev_driver_list is populated here
+ * from calls made to rte_eal_driver_register from constructor functions
+ * embedded into PMD modules via the PMD_REGISTER_DRIVER macro
+ */
+
+ /* call the init function for each virtual device */
+ TAILQ_FOREACH(devargs, &devargs_list, next) {
+
+ if (devargs->type != RTE_DEVTYPE_VIRTUAL)
+ continue;
+
+ if (rte_eal_vdev_init(devargs->virt.drv_name,
+ devargs->args)) {
+ RTE_LOG(ERR, EAL, "failed to initialize %s device\n",
+ devargs->virt.drv_name);
+ return -1;
+ }
+ }
+
+ /* Once the vdevs are initalized, start calling all the pdev drivers */
+ TAILQ_FOREACH(driver, &dev_driver_list, next) {
+ if (driver->type != PMD_PDEV)
+ continue;
+ /* PDEV drivers don't get passed any parameters */
+ driver->init(NULL, NULL);
+ }
+ return 0;
+}
+
+int
+rte_eal_vdev_uninit(const char *name)
+{
+ struct rte_driver *driver;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ TAILQ_FOREACH(driver, &dev_driver_list, next) {
+ if (driver->type != PMD_VDEV)
+ continue;
+
+ /*
+ * search a driver prefix in virtual device name.
+ * For example, if the driver is pcap PMD, driver->name
+ * will be "eth_pcap", but "name" will be "eth_pcapN".
+ * So use strncmp to compare.
+ */
+ if (!strncmp(driver->name, name, strlen(driver->name)))
+ return driver->uninit(name);
+ }
+
+ RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+ return -EINVAL;
+}
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
new file mode 100644
index 00000000..2bfe54a1
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -0,0 +1,176 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2014 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A nor the names of its contributors
+ * may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This file manages the list of devices and their arguments, as given
+ * by the user at startup
+ *
+ * Code here should not call rte_log since the EAL environment
+ * may not be initialized.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_pci.h>
+#include <rte_devargs.h>
+#include "eal_private.h"
+
+/** Global list of user devices */
+struct rte_devargs_list devargs_list =
+ TAILQ_HEAD_INITIALIZER(devargs_list);
+
+int
+rte_eal_parse_devargs_str(const char *devargs_str,
+ char **drvname, char **drvargs)
+{
+ char *sep;
+
+ if ((devargs_str) == NULL || (drvname) == NULL || (drvargs == NULL))
+ return -1;
+
+ *drvname = strdup(devargs_str);
+ if (drvname == NULL)
+ return -1;
+
+ /* set the first ',' to '\0' to split name and arguments */
+ sep = strchr(*drvname, ',');
+ if (sep != NULL) {
+ sep[0] = '\0';
+ *drvargs = strdup(sep + 1);
+ } else {
+ *drvargs = strdup("");
+ }
+
+ if (*drvargs == NULL) {
+ free(*drvname);
+ return -1;
+ }
+ return 0;
+}
+
+/* store a whitelist parameter for later parsing */
+int
+rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
+{
+ struct rte_devargs *devargs = NULL;
+ char *buf = NULL;
+ int ret;
+
+ /* use malloc instead of rte_malloc as it's called early at init */
+ devargs = malloc(sizeof(*devargs));
+ if (devargs == NULL)
+ goto fail;
+
+ memset(devargs, 0, sizeof(*devargs));
+ devargs->type = devtype;
+
+ if (rte_eal_parse_devargs_str(devargs_str, &buf, &devargs->args))
+ goto fail;
+
+ switch (devargs->type) {
+ case RTE_DEVTYPE_WHITELISTED_PCI:
+ case RTE_DEVTYPE_BLACKLISTED_PCI:
+ /* try to parse pci identifier */
+ if (eal_parse_pci_BDF(buf, &devargs->pci.addr) != 0 &&
+ eal_parse_pci_DomBDF(buf, &devargs->pci.addr) != 0)
+ goto fail;
+
+ break;
+ case RTE_DEVTYPE_VIRTUAL:
+ /* save driver name */
+ ret = snprintf(devargs->virt.drv_name,
+ sizeof(devargs->virt.drv_name), "%s", buf);
+ if (ret < 0 || ret >= (int)sizeof(devargs->virt.drv_name))
+ goto fail;
+
+ break;
+ }
+
+ free(buf);
+ TAILQ_INSERT_TAIL(&devargs_list, devargs, next);
+ return 0;
+
+fail:
+ free(buf);
+ if (devargs) {
+ free(devargs->args);
+ free(devargs);
+ }
+
+ return -1;
+}
+
+/* count the number of devices of a specified type */
+unsigned int
+rte_eal_devargs_type_count(enum rte_devtype devtype)
+{
+ struct rte_devargs *devargs;
+ unsigned int count = 0;
+
+ TAILQ_FOREACH(devargs, &devargs_list, next) {
+ if (devargs->type != devtype)
+ continue;
+ count++;
+ }
+ return count;
+}
+
+/* dump the user devices on the console */
+void
+rte_eal_devargs_dump(FILE *f)
+{
+ struct rte_devargs *devargs;
+
+ fprintf(f, "User device white list:\n");
+ TAILQ_FOREACH(devargs, &devargs_list, next) {
+ if (devargs->type == RTE_DEVTYPE_WHITELISTED_PCI)
+ fprintf(f, " PCI whitelist " PCI_PRI_FMT " %s\n",
+ devargs->pci.addr.domain,
+ devargs->pci.addr.bus,
+ devargs->pci.addr.devid,
+ devargs->pci.addr.function,
+ devargs->args);
+ else if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI)
+ fprintf(f, " PCI blacklist " PCI_PRI_FMT " %s\n",
+ devargs->pci.addr.domain,
+ devargs->pci.addr.bus,
+ devargs->pci.addr.devid,
+ devargs->pci.addr.function,
+ devargs->args);
+ else if (devargs->type == RTE_DEVTYPE_VIRTUAL)
+ fprintf(f, " VIRTUAL %s %s\n",
+ devargs->virt.drv_name,
+ devargs->args);
+ else
+ fprintf(f, " UNKNOWN %s\n", devargs->args);
+ }
+}
diff --git a/lib/librte_eal/common/eal_common_errno.c b/lib/librte_eal/common/eal_common_errno.c
new file mode 100644
index 00000000..de48d8e4
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_errno.c
@@ -0,0 +1,72 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+RTE_DEFINE_PER_LCORE(int, _rte_errno);
+
+const char *
+rte_strerror(int errnum)
+{
+#define RETVAL_SZ 256
+ static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
+
+ /* since some implementations of strerror_r throw an error
+ * themselves if errnum is too big, we handle that case here */
+ if (errnum > RTE_MAX_ERRNO)
+ snprintf(RTE_PER_LCORE(retval), RETVAL_SZ,
+#ifdef RTE_EXEC_ENV_BSDAPP
+ "Unknown error: %d", errnum);
+#else
+ "Unknown error %d", errnum);
+#endif
+ else
+ switch (errnum){
+ case E_RTE_SECONDARY:
+ return "Invalid call in secondary process";
+ case E_RTE_NO_CONFIG:
+ return "Missing rte_config structure";
+ default:
+ strerror_r(errnum, RTE_PER_LCORE(retval), RETVAL_SZ);
+ }
+
+ return RTE_PER_LCORE(retval);
+}
diff --git a/lib/librte_eal/common/eal_common_hexdump.c b/lib/librte_eal/common/eal_common_hexdump.c
new file mode 100644
index 00000000..d5cbd703
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_hexdump.c
@@ -0,0 +1,120 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+#include <rte_hexdump.h>
+#include <rte_string_fns.h>
+
+#define LINE_LEN 128
+
+/**************************************************************************//**
+*
+* rte_hexdump - Dump out memory in a special hex dump format.
+*
+* DESCRIPTION
+* Dump out the message buffer in a special hex dump output format with characters
+* printed for each line of 16 hex values.
+*
+* RETURNS: N/A
+*
+* SEE ALSO:
+*/
+
+void
+rte_hexdump(FILE *f, const char * title, const void * buf, unsigned int len)
+{
+ unsigned int i, out, ofs;
+ const unsigned char *data = buf;
+ char line[LINE_LEN]; /* space needed 8+16*3+3+16 == 75 */
+
+ fprintf(f, "%s at [%p], len=%u\n", (title)? title : " Dump data", data, len);
+ ofs = 0;
+ while (ofs < len) {
+ /* format the line in the buffer, then use printf to output to screen */
+ out = snprintf(line, LINE_LEN, "%08X:", ofs);
+ for (i = 0; ((ofs + i) < len) && (i < 16); i++)
+ out += snprintf(line+out, LINE_LEN - out, " %02X", (data[ofs+i] & 0xff));
+ for(; i <= 16; i++)
+ out += snprintf(line+out, LINE_LEN - out, " | ");
+ for(i = 0; (ofs < len) && (i < 16); i++, ofs++) {
+ unsigned char c = data[ofs];
+ if ( (c < ' ') || (c > '~'))
+ c = '.';
+ out += snprintf(line+out, LINE_LEN - out, "%c", c);
+ }
+ fprintf(f, "%s\n", line);
+ }
+ fflush(f);
+}
+
+/**************************************************************************//**
+*
+* rte_memdump - Dump out memory in hex bytes with colons.
+*
+* DESCRIPTION
+* Dump out the message buffer in hex bytes with colons xx:xx:xx:xx:...
+*
+* RETURNS: N/A
+*
+* SEE ALSO:
+*/
+
+void
+rte_memdump(FILE *f, const char * title, const void * buf, unsigned int len)
+{
+ unsigned int i, out;
+ const unsigned char *data = buf;
+ char line[LINE_LEN];
+
+ if ( title )
+ fprintf(f, "%s: ", title);
+
+ line[0] = '\0';
+ for (i = 0, out = 0; i < len; i++) {
+ // Make sure we do not overrun the line buffer length.
+ if ( out >= (LINE_LEN - 4) ) {
+ fprintf(f, "%s", line);
+ out = 0;
+ line[out] = '\0';
+ }
+ out += snprintf(line+out, LINE_LEN - out, "%02x%s",
+ (data[i] & 0xff), ((i+1) < len)? ":" : "");
+ }
+ if ( out > 0 )
+ fprintf(f, "%s", line);
+ fprintf(f, "\n");
+
+ fflush(f);
+}
diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c
new file mode 100644
index 00000000..229c3a03
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_launch.c
@@ -0,0 +1,118 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_launch.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_atomic.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+
+/*
+ * Wait until a lcore finished its job.
+ */
+int
+rte_eal_wait_lcore(unsigned slave_id)
+{
+ if (lcore_config[slave_id].state == WAIT)
+ return 0;
+
+ while (lcore_config[slave_id].state != WAIT &&
+ lcore_config[slave_id].state != FINISHED);
+
+ rte_rmb();
+
+ /* we are in finished state, go to wait state */
+ lcore_config[slave_id].state = WAIT;
+ return lcore_config[slave_id].ret;
+}
+
+/*
+ * Check that every SLAVE lcores are in WAIT state, then call
+ * rte_eal_remote_launch() for all of them. If call_master is true
+ * (set to CALL_MASTER), also call the function on the master lcore.
+ */
+int
+rte_eal_mp_remote_launch(int (*f)(void *), void *arg,
+ enum rte_rmt_call_master_t call_master)
+{
+ int lcore_id;
+ int master = rte_get_master_lcore();
+
+ /* check state of lcores */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (lcore_config[lcore_id].state != WAIT)
+ return -EBUSY;
+ }
+
+ /* send messages to cores */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_remote_launch(f, arg, lcore_id);
+ }
+
+ if (call_master == CALL_MASTER) {
+ lcore_config[master].ret = f(arg);
+ lcore_config[master].state = FINISHED;
+ }
+
+ return 0;
+}
+
+/*
+ * Return the state of the lcore identified by slave_id.
+ */
+enum rte_lcore_state_t
+rte_eal_get_lcore_state(unsigned lcore_id)
+{
+ return lcore_config[lcore_id].state;
+}
+
+/*
+ * Do a rte_eal_wait_lcore() for every lcore. The return values are
+ * ignored.
+ */
+void
+rte_eal_mp_wait_lcore(void)
+{
+ unsigned lcore_id;
+
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_wait_lcore(lcore_id);
+ }
+}
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
new file mode 100644
index 00000000..a4263ba5
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -0,0 +1,110 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+#include <dirent.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+/*
+ * Parse /sys/devices/system/cpu to get the number of physical and logical
+ * processors on the machine. The function will fill the cpu_info
+ * structure.
+ */
+int
+rte_eal_cpu_init(void)
+{
+ /* pointer to global configuration */
+ struct rte_config *config = rte_eal_get_configuration();
+ unsigned lcore_id;
+ unsigned count = 0;
+
+ /*
+ * Parse the maximum set of logical cores, detect the subset of running
+ * ones and enable them by default.
+ */
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ lcore_config[lcore_id].core_index = count;
+
+ /* init cpuset for per lcore config */
+ CPU_ZERO(&lcore_config[lcore_id].cpuset);
+
+ /* in 1:1 mapping, record related cpu detected state */
+ lcore_config[lcore_id].detected = eal_cpu_detected(lcore_id);
+ if (lcore_config[lcore_id].detected == 0) {
+ config->lcore_role[lcore_id] = ROLE_OFF;
+ lcore_config[lcore_id].core_index = -1;
+ continue;
+ }
+
+ /* By default, lcore 1:1 map to cpu id */
+ CPU_SET(lcore_id, &lcore_config[lcore_id].cpuset);
+
+ /* By default, each detected core is enabled */
+ config->lcore_role[lcore_id] = ROLE_RTE;
+ lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id);
+ lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id);
+ if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES)
+#ifdef RTE_EAL_ALLOW_INV_SOCKET_ID
+ lcore_config[lcore_id].socket_id = 0;
+#else
+ rte_panic("Socket ID (%u) is greater than "
+ "RTE_MAX_NUMA_NODES (%d)\n",
+ lcore_config[lcore_id].socket_id,
+ RTE_MAX_NUMA_NODES);
+#endif
+
+ RTE_LOG(DEBUG, EAL, "Detected lcore %u as "
+ "core %u on socket %u\n",
+ lcore_id, lcore_config[lcore_id].core_id,
+ lcore_config[lcore_id].socket_id);
+ count++;
+ }
+ /* Set the count of enabled logical cores of the EAL configuration */
+ config->lcore_count = count;
+ RTE_LOG(DEBUG, EAL,
+ "Support maximum %u logical core(s) by configuration.\n",
+ RTE_MAX_LCORE);
+ RTE_LOG(DEBUG, EAL, "Detected %u lcore(s)\n", config->lcore_count);
+
+ return 0;
+}
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
new file mode 100644
index 00000000..1ae8de70
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -0,0 +1,337 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_debug.h>
+#include <rte_spinlock.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+
+#include "eal_private.h"
+
+#define LOG_ELT_SIZE 2048
+
+#define LOG_HISTORY_MP_NAME "log_history"
+
+STAILQ_HEAD(log_history_list, log_history);
+
+/**
+ * The structure of a message log in the log history.
+ */
+struct log_history {
+ STAILQ_ENTRY(log_history) next;
+ unsigned size;
+ char buf[0];
+};
+
+static struct rte_mempool *log_history_mp = NULL;
+static unsigned log_history_size = 0;
+static struct log_history_list log_history;
+
+/* global log structure */
+struct rte_logs rte_logs = {
+ .type = ~0,
+ .level = RTE_LOG_DEBUG,
+ .file = NULL,
+};
+
+static rte_spinlock_t log_dump_lock = RTE_SPINLOCK_INITIALIZER;
+static rte_spinlock_t log_list_lock = RTE_SPINLOCK_INITIALIZER;
+static FILE *default_log_stream;
+static int history_enabled = 1;
+
+/**
+ * This global structure stores some informations about the message
+ * that is currently beeing processed by one lcore
+ */
+struct log_cur_msg {
+ uint32_t loglevel; /**< log level - see rte_log.h */
+ uint32_t logtype; /**< log type - see rte_log.h */
+} __rte_cache_aligned;
+static struct log_cur_msg log_cur_msg[RTE_MAX_LCORE]; /**< per core log */
+
+
+/* default logs */
+
+int
+rte_log_add_in_history(const char *buf, size_t size)
+{
+ struct log_history *hist_buf = NULL;
+ static const unsigned hist_buf_size = LOG_ELT_SIZE - sizeof(*hist_buf);
+ void *obj;
+
+ if (history_enabled == 0)
+ return 0;
+
+ rte_spinlock_lock(&log_list_lock);
+
+ /* get a buffer for adding in history */
+ if (log_history_size > RTE_LOG_HISTORY) {
+ hist_buf = STAILQ_FIRST(&log_history);
+ if (hist_buf) {
+ STAILQ_REMOVE_HEAD(&log_history, next);
+ log_history_size--;
+ }
+ }
+ else {
+ if (rte_mempool_mc_get(log_history_mp, &obj) < 0)
+ obj = NULL;
+ hist_buf = obj;
+ }
+
+ /* no buffer */
+ if (hist_buf == NULL) {
+ rte_spinlock_unlock(&log_list_lock);
+ return -ENOBUFS;
+ }
+
+ /* not enough room for msg, buffer go back in mempool */
+ if (size >= hist_buf_size) {
+ rte_mempool_mp_put(log_history_mp, hist_buf);
+ rte_spinlock_unlock(&log_list_lock);
+ return -ENOBUFS;
+ }
+
+ /* add in history */
+ memcpy(hist_buf->buf, buf, size);
+ hist_buf->buf[size] = hist_buf->buf[hist_buf_size-1] = '\0';
+ hist_buf->size = size;
+ STAILQ_INSERT_TAIL(&log_history, hist_buf, next);
+ log_history_size++;
+ rte_spinlock_unlock(&log_list_lock);
+
+ return 0;
+}
+
+void
+rte_log_set_history(int enable)
+{
+ history_enabled = enable;
+}
+
+/* Change the stream that will be used by logging system */
+int
+rte_openlog_stream(FILE *f)
+{
+ if (f == NULL)
+ rte_logs.file = default_log_stream;
+ else
+ rte_logs.file = f;
+ return 0;
+}
+
+/* Set global log level */
+void
+rte_set_log_level(uint32_t level)
+{
+ rte_logs.level = (uint32_t)level;
+}
+
+/* Get global log level */
+uint32_t
+rte_get_log_level(void)
+{
+ return rte_logs.level;
+}
+
+/* Set global log type */
+void
+rte_set_log_type(uint32_t type, int enable)
+{
+ if (enable)
+ rte_logs.type |= type;
+ else
+ rte_logs.type &= (~type);
+}
+
+/* Get global log type */
+uint32_t
+rte_get_log_type(void)
+{
+ return rte_logs.type;
+}
+
+/* get the current loglevel for the message beeing processed */
+int rte_log_cur_msg_loglevel(void)
+{
+ unsigned lcore_id;
+ lcore_id = rte_lcore_id();
+ if (lcore_id >= RTE_MAX_LCORE)
+ return rte_get_log_level();
+ return log_cur_msg[lcore_id].loglevel;
+}
+
+/* get the current logtype for the message beeing processed */
+int rte_log_cur_msg_logtype(void)
+{
+ unsigned lcore_id;
+ lcore_id = rte_lcore_id();
+ if (lcore_id >= RTE_MAX_LCORE)
+ return rte_get_log_type();
+ return log_cur_msg[lcore_id].logtype;
+}
+
+/* Dump log history to file */
+void
+rte_log_dump_history(FILE *out)
+{
+ struct log_history_list tmp_log_history;
+ struct log_history *hist_buf;
+ unsigned i;
+
+ /* only one dump at a time */
+ rte_spinlock_lock(&log_dump_lock);
+
+ /* save list, and re-init to allow logging during dump */
+ rte_spinlock_lock(&log_list_lock);
+ tmp_log_history = log_history;
+ STAILQ_INIT(&log_history);
+ log_history_size = 0;
+ rte_spinlock_unlock(&log_list_lock);
+
+ for (i=0; i<RTE_LOG_HISTORY; i++) {
+
+ /* remove one message from history list */
+ hist_buf = STAILQ_FIRST(&tmp_log_history);
+
+ if (hist_buf == NULL)
+ break;
+
+ STAILQ_REMOVE_HEAD(&tmp_log_history, next);
+
+ /* write on stdout */
+ if (fwrite(hist_buf->buf, hist_buf->size, 1, out) == 0) {
+ rte_mempool_mp_put(log_history_mp, hist_buf);
+ break;
+ }
+
+ /* put back message structure in pool */
+ rte_mempool_mp_put(log_history_mp, hist_buf);
+ }
+ fflush(out);
+
+ rte_spinlock_unlock(&log_dump_lock);
+}
+
+/*
+ * Generates a log message The message will be sent in the stream
+ * defined by the previous call to rte_openlog_stream().
+ */
+int
+rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
+{
+ int ret;
+ FILE *f = rte_logs.file;
+ unsigned lcore_id;
+
+ if ((level > rte_logs.level) || !(logtype & rte_logs.type))
+ return 0;
+
+ /* save loglevel and logtype in a global per-lcore variable */
+ lcore_id = rte_lcore_id();
+ if (lcore_id < RTE_MAX_LCORE) {
+ log_cur_msg[lcore_id].loglevel = level;
+ log_cur_msg[lcore_id].logtype = logtype;
+ }
+
+ ret = vfprintf(f, format, ap);
+ fflush(f);
+ return ret;
+}
+
+/*
+ * Generates a log message The message will be sent in the stream
+ * defined by the previous call to rte_openlog_stream().
+ * No need to check level here, done by rte_vlog().
+ */
+int
+rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, format);
+ ret = rte_vlog(level, logtype, format, ap);
+ va_end(ap);
+ return ret;
+}
+
+/*
+ * called by environment-specific log init function to initialize log
+ * history
+ */
+int
+rte_eal_common_log_init(FILE *default_log)
+{
+ STAILQ_INIT(&log_history);
+
+ /* reserve RTE_LOG_HISTORY*2 elements, so we can dump and
+ * keep logging during this time */
+ log_history_mp = rte_mempool_create(LOG_HISTORY_MP_NAME, RTE_LOG_HISTORY*2,
+ LOG_ELT_SIZE, 0, 0,
+ NULL, NULL,
+ NULL, NULL,
+ SOCKET_ID_ANY, 0);
+
+ if ((log_history_mp == NULL) &&
+ ((log_history_mp = rte_mempool_lookup(LOG_HISTORY_MP_NAME)) == NULL)){
+ RTE_LOG(ERR, EAL, "%s(): cannot create log_history mempool\n",
+ __func__);
+ return -1;
+ }
+
+ default_log_stream = default_log;
+ rte_openlog_stream(default_log);
+ return 0;
+}
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
new file mode 100644
index 00000000..6155752e
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -0,0 +1,154 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_log.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+
+/*
+ * Return a pointer to a read-only table of struct rte_physmem_desc
+ * elements, containing the layout of all addressable physical
+ * memory. The last element of the table contains a NULL address.
+ */
+const struct rte_memseg *
+rte_eal_get_physmem_layout(void)
+{
+ return rte_eal_get_configuration()->mem_config->memseg;
+}
+
+
+/* get the total size of memory */
+uint64_t
+rte_eal_get_physmem_size(void)
+{
+ const struct rte_mem_config *mcfg;
+ unsigned i = 0;
+ uint64_t total_len = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ if (mcfg->memseg[i].addr == NULL)
+ break;
+
+ total_len += mcfg->memseg[i].len;
+ }
+
+ return total_len;
+}
+
+/* Dump the physical memory layout on console */
+void
+rte_dump_physmem_layout(FILE *f)
+{
+ const struct rte_mem_config *mcfg;
+ unsigned i = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ if (mcfg->memseg[i].addr == NULL)
+ break;
+
+ fprintf(f, "Segment %u: phys:0x%"PRIx64", len:%zu, "
+ "virt:%p, socket_id:%"PRId32", "
+ "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
+ "nrank:%"PRIx32"\n", i,
+ mcfg->memseg[i].phys_addr,
+ mcfg->memseg[i].len,
+ mcfg->memseg[i].addr,
+ mcfg->memseg[i].socket_id,
+ mcfg->memseg[i].hugepage_sz,
+ mcfg->memseg[i].nchannel,
+ mcfg->memseg[i].nrank);
+ }
+}
+
+/* return the number of memory channels */
+unsigned rte_memory_get_nchannel(void)
+{
+ return rte_eal_get_configuration()->mem_config->nchannel;
+}
+
+/* return the number of memory rank */
+unsigned rte_memory_get_nrank(void)
+{
+ return rte_eal_get_configuration()->mem_config->nrank;
+}
+
+static int
+rte_eal_memdevice_init(void)
+{
+ struct rte_config *config;
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ return 0;
+
+ config = rte_eal_get_configuration();
+ config->mem_config->nchannel = internal_config.force_nchannel;
+ config->mem_config->nrank = internal_config.force_nrank;
+
+ return 0;
+}
+
+/* init memory subsystem */
+int
+rte_eal_memory_init(void)
+{
+ RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
+
+ const int retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+ rte_eal_hugepage_init() :
+ rte_eal_hugepage_attach();
+ if (retval < 0)
+ return -1;
+
+ if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0)
+ return -1;
+
+ return 0;
+}
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
new file mode 100644
index 00000000..711c8457
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -0,0 +1,445 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+
+#include "malloc_heap.h"
+#include "malloc_elem.h"
+#include "eal_private.h"
+
+static inline const struct rte_memzone *
+memzone_lookup_thread_unsafe(const char *name)
+{
+ const struct rte_mem_config *mcfg;
+ const struct rte_memzone *mz;
+ unsigned i = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /*
+ * the algorithm is not optimal (linear), but there are few
+ * zones and this function should be called at init only
+ */
+ for (i = 0; i < RTE_MAX_MEMZONE; i++) {
+ mz = &mcfg->memzone[i];
+ if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
+ return &mcfg->memzone[i];
+ }
+
+ return NULL;
+}
+
+static inline struct rte_memzone *
+get_next_free_memzone(void)
+{
+ struct rte_mem_config *mcfg;
+ unsigned i = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ for (i = 0; i < RTE_MAX_MEMZONE; i++) {
+ if (mcfg->memzone[i].addr == NULL)
+ return &mcfg->memzone[i];
+ }
+
+ return NULL;
+}
+
+/* This function will return the greatest free block if a heap has been
+ * specified. If no heap has been specified, it will return the heap and
+ * length of the greatest free block available in all heaps */
+static size_t
+find_heap_max_free_elem(int *s, unsigned align)
+{
+ struct rte_mem_config *mcfg;
+ struct rte_malloc_socket_stats stats;
+ int i, socket = *s;
+ size_t len = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+ if ((socket != SOCKET_ID_ANY) && (socket != i))
+ continue;
+
+ malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats);
+ if (stats.greatest_free_size > len) {
+ len = stats.greatest_free_size;
+ *s = i;
+ }
+ }
+
+ return len - MALLOC_ELEM_OVERHEAD - align;
+}
+
+static const struct rte_memzone *
+memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
+ int socket_id, unsigned flags, unsigned align, unsigned bound)
+{
+ struct rte_mem_config *mcfg;
+ size_t requested_len;
+ int socket, i;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* no more room in config */
+ if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
+ RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
+ rte_errno = ENOSPC;
+ return NULL;
+ }
+
+ /* zone already exist */
+ if ((memzone_lookup_thread_unsafe(name)) != NULL) {
+ RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
+ __func__, name);
+ rte_errno = EEXIST;
+ return NULL;
+ }
+
+ /* if alignment is not a power of two */
+ if (align && !rte_is_power_of_2(align)) {
+ RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
+ align);
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* alignment less than cache size is not allowed */
+ if (align < RTE_CACHE_LINE_SIZE)
+ align = RTE_CACHE_LINE_SIZE;
+
+ /* align length on cache boundary. Check for overflow before doing so */
+ if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
+ rte_errno = EINVAL; /* requested size too big */
+ return NULL;
+ }
+
+ len += RTE_CACHE_LINE_MASK;
+ len &= ~((size_t) RTE_CACHE_LINE_MASK);
+
+ /* save minimal requested length */
+ requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
+
+ /* check that boundary condition is valid */
+ if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if (!rte_eal_has_hugepages())
+ socket_id = SOCKET_ID_ANY;
+
+ if (len == 0) {
+ if (bound != 0)
+ requested_len = bound;
+ else
+ requested_len = find_heap_max_free_elem(&socket_id, align);
+ }
+
+ if (socket_id == SOCKET_ID_ANY)
+ socket = malloc_get_numa_socket();
+ else
+ socket = socket_id;
+
+ /* allocate memory on heap */
+ void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
+ requested_len, flags, align, bound);
+
+ if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
+ /* try other heaps */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+ if (socket == i)
+ continue;
+
+ mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
+ NULL, requested_len, flags, align, bound);
+ if (mz_addr != NULL)
+ break;
+ }
+ }
+
+ if (mz_addr == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ const struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
+
+ /* fill the zone in config */
+ struct rte_memzone *mz = get_next_free_memzone();
+
+ if (mz == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room "
+ "in config!\n", __func__);
+ rte_errno = ENOSPC;
+ return NULL;
+ }
+
+ mcfg->memzone_cnt++;
+ snprintf(mz->name, sizeof(mz->name), "%s", name);
+ mz->phys_addr = rte_malloc_virt2phy(mz_addr);
+ mz->addr = mz_addr;
+ mz->len = (requested_len == 0 ? elem->size : requested_len);
+ mz->hugepage_sz = elem->ms->hugepage_sz;
+ mz->socket_id = elem->ms->socket_id;
+ mz->flags = 0;
+ mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;
+
+ return mz;
+}
+
+static const struct rte_memzone *
+rte_memzone_reserve_thread_safe(const char *name, size_t len,
+ int socket_id, unsigned flags, unsigned align,
+ unsigned bound)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memzone *mz = NULL;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ mz = memzone_reserve_aligned_thread_unsafe(
+ name, len, socket_id, flags, align, bound);
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ return mz;
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment and boundary). If the allocation cannot be done,
+ * return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id,
+ unsigned flags, unsigned align, unsigned bound)
+{
+ return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
+ align, bound);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment). If the allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id,
+ unsigned flags, unsigned align)
+{
+ return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
+ align, 0);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor. If the
+ * allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve(const char *name, size_t len, int socket_id,
+ unsigned flags)
+{
+ return rte_memzone_reserve_thread_safe(name, len, socket_id,
+ flags, RTE_CACHE_LINE_SIZE, 0);
+}
+
+int
+rte_memzone_free(const struct rte_memzone *mz)
+{
+ struct rte_mem_config *mcfg;
+ int ret = 0;
+ void *addr;
+ unsigned idx;
+
+ if (mz == NULL)
+ return -EINVAL;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
+ idx = idx / sizeof(struct rte_memzone);
+
+ addr = mcfg->memzone[idx].addr;
+#ifdef RTE_LIBRTE_IVSHMEM
+ /*
+ * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot
+ * free it.
+ */
+ if (mcfg->memzone[idx].ioremap_addr != 0)
+ ret = -EINVAL;
+#endif
+ if (addr == NULL)
+ ret = -EINVAL;
+ else if (mcfg->memzone_cnt == 0) {
+ rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n",
+ __func__);
+ } else {
+ memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx]));
+ mcfg->memzone_cnt--;
+ }
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ rte_free(addr);
+
+ return ret;
+}
+
+/*
+ * Lookup for the memzone identified by the given name
+ */
+const struct rte_memzone *
+rte_memzone_lookup(const char *name)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memzone *memzone = NULL;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
+
+ memzone = memzone_lookup_thread_unsafe(name);
+
+ rte_rwlock_read_unlock(&mcfg->mlock);
+
+ return memzone;
+}
+
+/* Dump all reserved memory zones on console */
+void
+rte_memzone_dump(FILE *f)
+{
+ struct rte_mem_config *mcfg;
+ unsigned i = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
+ /* dump all zones */
+ for (i=0; i<RTE_MAX_MEMZONE; i++) {
+ if (mcfg->memzone[i].addr == NULL)
+ break;
+ fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx"
+ ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
+ mcfg->memzone[i].name,
+ mcfg->memzone[i].phys_addr,
+ mcfg->memzone[i].len,
+ mcfg->memzone[i].addr,
+ mcfg->memzone[i].socket_id,
+ mcfg->memzone[i].flags);
+ }
+ rte_rwlock_read_unlock(&mcfg->mlock);
+}
+
+/*
+ * Init the memzone subsystem
+ */
+int
+rte_eal_memzone_init(void)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memseg *memseg;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* secondary processes don't need to initialise anything */
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ return 0;
+
+ memseg = rte_eal_get_physmem_layout();
+ if (memseg == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__);
+ return -1;
+ }
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ /* delete all zones */
+ mcfg->memzone_cnt = 0;
+ memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ return rte_eal_malloc_heap_init();
+}
+
+/* Walk all reserved memory zones */
+void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
+ void *arg)
+{
+ struct rte_mem_config *mcfg;
+ unsigned i;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
+ for (i=0; i<RTE_MAX_MEMZONE; i++) {
+ if (mcfg->memzone[i].addr != NULL)
+ (*func)(&mcfg->memzone[i], arg);
+ }
+ rte_rwlock_read_unlock(&mcfg->mlock);
+}
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
new file mode 100644
index 00000000..2b418d52
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -0,0 +1,1022 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2014 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <syslog.h>
+#include <ctype.h>
+#include <limits.h>
+#include <errno.h>
+#include <getopt.h>
+#include <dlfcn.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_lcore.h>
+#include <rte_version.h>
+#include <rte_devargs.h>
+#include <rte_memcpy.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_options.h"
+#include "eal_filesystem.h"
+
+#define BITS_PER_HEX 4
+
+const char
+eal_short_options[] =
+ "b:" /* pci-blacklist */
+ "c:" /* coremask */
+ "d:" /* driver */
+ "h" /* help */
+ "l:" /* corelist */
+ "m:" /* memory size */
+ "n:" /* memory channels */
+ "r:" /* memory ranks */
+ "v" /* version */
+ "w:" /* pci-whitelist */
+ ;
+
+const struct option
+eal_long_options[] = {
+ {OPT_BASE_VIRTADDR, 1, NULL, OPT_BASE_VIRTADDR_NUM },
+ {OPT_CREATE_UIO_DEV, 0, NULL, OPT_CREATE_UIO_DEV_NUM },
+ {OPT_FILE_PREFIX, 1, NULL, OPT_FILE_PREFIX_NUM },
+ {OPT_HELP, 0, NULL, OPT_HELP_NUM },
+ {OPT_HUGE_DIR, 1, NULL, OPT_HUGE_DIR_NUM },
+ {OPT_HUGE_UNLINK, 0, NULL, OPT_HUGE_UNLINK_NUM },
+ {OPT_LCORES, 1, NULL, OPT_LCORES_NUM },
+ {OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM },
+ {OPT_MASTER_LCORE, 1, NULL, OPT_MASTER_LCORE_NUM },
+ {OPT_NO_HPET, 0, NULL, OPT_NO_HPET_NUM },
+ {OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM },
+ {OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM },
+ {OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM },
+ {OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM },
+ {OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM },
+ {OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM },
+ {OPT_SOCKET_MEM, 1, NULL, OPT_SOCKET_MEM_NUM },
+ {OPT_SYSLOG, 1, NULL, OPT_SYSLOG_NUM },
+ {OPT_VDEV, 1, NULL, OPT_VDEV_NUM },
+ {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
+ {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
+ {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM },
+ {0, 0, NULL, 0 }
+};
+
+TAILQ_HEAD(shared_driver_list, shared_driver);
+
+/* Definition for shared object drivers. */
+struct shared_driver {
+ TAILQ_ENTRY(shared_driver) next;
+
+ char name[PATH_MAX];
+ void* lib_handle;
+};
+
+/* List of external loadable drivers */
+static struct shared_driver_list solib_list =
+TAILQ_HEAD_INITIALIZER(solib_list);
+
+/* Default path of external loadable drivers */
+static const char *default_solib_dir = RTE_EAL_PMD_PATH;
+
+static int master_lcore_parsed;
+static int mem_parsed;
+
+void
+eal_reset_internal_config(struct internal_config *internal_cfg)
+{
+ int i;
+
+ internal_cfg->memory = 0;
+ internal_cfg->force_nrank = 0;
+ internal_cfg->force_nchannel = 0;
+ internal_cfg->hugefile_prefix = HUGEFILE_PREFIX_DEFAULT;
+ internal_cfg->hugepage_dir = NULL;
+ internal_cfg->force_sockets = 0;
+ /* zero out the NUMA config */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ internal_cfg->socket_mem[i] = 0;
+ /* zero out hugedir descriptors */
+ for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
+ internal_cfg->hugepage_info[i].lock_descriptor = -1;
+ internal_cfg->base_virtaddr = 0;
+
+ internal_cfg->syslog_facility = LOG_DAEMON;
+ /* default value from build option */
+ internal_cfg->log_level = RTE_LOG_LEVEL;
+
+ internal_cfg->xen_dom0_support = 0;
+
+ /* if set to NONE, interrupt mode is determined automatically */
+ internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE;
+
+#ifdef RTE_LIBEAL_USE_HPET
+ internal_cfg->no_hpet = 0;
+#else
+ internal_cfg->no_hpet = 1;
+#endif
+ internal_cfg->vmware_tsc_map = 0;
+ internal_cfg->create_uio_dev = 0;
+}
+
+static int
+eal_plugin_add(const char *path)
+{
+ struct shared_driver *solib;
+
+ solib = malloc(sizeof(*solib));
+ if (solib == NULL) {
+ RTE_LOG(ERR, EAL, "malloc(solib) failed\n");
+ return -1;
+ }
+ memset(solib, 0, sizeof(*solib));
+ strncpy(solib->name, path, PATH_MAX-1);
+ solib->name[PATH_MAX-1] = 0;
+ TAILQ_INSERT_TAIL(&solib_list, solib, next);
+
+ return 0;
+}
+
+static int
+eal_plugindir_init(const char *path)
+{
+ DIR *d = NULL;
+ struct dirent *dent = NULL;
+ char sopath[PATH_MAX];
+
+ if (path == NULL || *path == '\0')
+ return 0;
+
+ d = opendir(path);
+ if (d == NULL) {
+ RTE_LOG(ERR, EAL, "failed to open directory %s: %s\n",
+ path, strerror(errno));
+ return -1;
+ }
+
+ while ((dent = readdir(d)) != NULL) {
+ struct stat sb;
+
+ snprintf(sopath, PATH_MAX-1, "%s/%s", path, dent->d_name);
+ sopath[PATH_MAX-1] = 0;
+
+ if (!(stat(sopath, &sb) == 0 && S_ISREG(sb.st_mode)))
+ continue;
+
+ if (eal_plugin_add(sopath) == -1)
+ break;
+ }
+
+ closedir(d);
+ /* XXX this ignores failures from readdir() itself */
+ return (dent == NULL) ? 0 : -1;
+}
+
+int
+eal_plugins_init(void)
+{
+ struct shared_driver *solib = NULL;
+
+ if (*default_solib_dir != '\0')
+ eal_plugin_add(default_solib_dir);
+
+ TAILQ_FOREACH(solib, &solib_list, next) {
+ struct stat sb;
+
+ if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) {
+ if (eal_plugindir_init(solib->name) == -1) {
+ RTE_LOG(ERR, EAL,
+ "Cannot init plugin directory %s\n",
+ solib->name);
+ return -1;
+ }
+ } else {
+ RTE_LOG(DEBUG, EAL, "open shared lib %s\n",
+ solib->name);
+ solib->lib_handle = dlopen(solib->name, RTLD_NOW);
+ if (solib->lib_handle == NULL) {
+ RTE_LOG(ERR, EAL, "%s\n", dlerror());
+ return -1;
+ }
+ }
+
+ }
+ return 0;
+}
+
+/*
+ * Parse the coremask given as argument (hexadecimal string) and fill
+ * the global configuration (core role and core count) with the parsed
+ * value.
+ */
+static int xdigit2val(unsigned char c)
+{
+ int val;
+
+ if (isdigit(c))
+ val = c - '0';
+ else if (isupper(c))
+ val = c - 'A' + 10;
+ else
+ val = c - 'a' + 10;
+ return val;
+}
+
+static int
+eal_parse_coremask(const char *coremask)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ int i, j, idx = 0;
+ unsigned count = 0;
+ char c;
+ int val;
+
+ if (coremask == NULL)
+ return -1;
+ /* Remove all blank characters ahead and after .
+ * Remove 0x/0X if exists.
+ */
+ while (isblank(*coremask))
+ coremask++;
+ if (coremask[0] == '0' && ((coremask[1] == 'x')
+ || (coremask[1] == 'X')))
+ coremask += 2;
+ i = strlen(coremask);
+ while ((i > 0) && isblank(coremask[i - 1]))
+ i--;
+ if (i == 0)
+ return -1;
+
+ for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
+ c = coremask[i];
+ if (isxdigit(c) == 0) {
+ /* invalid characters */
+ return -1;
+ }
+ val = xdigit2val(c);
+ for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++)
+ {
+ if ((1 << j) & val) {
+ if (!lcore_config[idx].detected) {
+ RTE_LOG(ERR, EAL, "lcore %u "
+ "unavailable\n", idx);
+ return -1;
+ }
+ cfg->lcore_role[idx] = ROLE_RTE;
+ lcore_config[idx].core_index = count;
+ count++;
+ } else {
+ cfg->lcore_role[idx] = ROLE_OFF;
+ lcore_config[idx].core_index = -1;
+ }
+ }
+ }
+ for (; i >= 0; i--)
+ if (coremask[i] != '0')
+ return -1;
+ for (; idx < RTE_MAX_LCORE; idx++) {
+ cfg->lcore_role[idx] = ROLE_OFF;
+ lcore_config[idx].core_index = -1;
+ }
+ if (count == 0)
+ return -1;
+ /* Update the count of enabled logical cores of the EAL configuration */
+ cfg->lcore_count = count;
+ return 0;
+}
+
+static int
+eal_parse_corelist(const char *corelist)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ int i, idx = 0;
+ unsigned count = 0;
+ char *end = NULL;
+ int min, max;
+
+ if (corelist == NULL)
+ return -1;
+
+ /* Remove all blank characters ahead and after */
+ while (isblank(*corelist))
+ corelist++;
+ i = strlen(corelist);
+ while ((i > 0) && isblank(corelist[i - 1]))
+ i--;
+
+ /* Reset config */
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ cfg->lcore_role[idx] = ROLE_OFF;
+ lcore_config[idx].core_index = -1;
+ }
+
+ /* Get list of cores */
+ min = RTE_MAX_LCORE;
+ do {
+ while (isblank(*corelist))
+ corelist++;
+ if (*corelist == '\0')
+ return -1;
+ errno = 0;
+ idx = strtoul(corelist, &end, 10);
+ if (errno || end == NULL)
+ return -1;
+ while (isblank(*end))
+ end++;
+ if (*end == '-') {
+ min = idx;
+ } else if ((*end == ',') || (*end == '\0')) {
+ max = idx;
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ for (idx = min; idx <= max; idx++) {
+ if (cfg->lcore_role[idx] != ROLE_RTE) {
+ cfg->lcore_role[idx] = ROLE_RTE;
+ lcore_config[idx].core_index = count;
+ count++;
+ }
+ }
+ min = RTE_MAX_LCORE;
+ } else
+ return -1;
+ corelist = end + 1;
+ } while (*end != '\0');
+
+ if (count == 0)
+ return -1;
+
+ /* Update the count of enabled logical cores of the EAL configuration */
+ cfg->lcore_count = count;
+
+ return 0;
+}
+
+/* Changes the lcore id of the master thread */
+static int
+eal_parse_master_lcore(const char *arg)
+{
+ char *parsing_end;
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ errno = 0;
+ cfg->master_lcore = (uint32_t) strtol(arg, &parsing_end, 0);
+ if (errno || parsing_end[0] != 0)
+ return -1;
+ if (cfg->master_lcore >= RTE_MAX_LCORE)
+ return -1;
+ master_lcore_parsed = 1;
+ return 0;
+}
+
+/*
+ * Parse elem, the elem could be single number/range or '(' ')' group
+ * 1) A single number elem, it's just a simple digit. e.g. 9
+ * 2) A single range elem, two digits with a '-' between. e.g. 2-6
+ * 3) A group elem, combines multiple 1) or 2) with '( )'. e.g (0,2-4,6)
+ * Within group elem, '-' used for a range separator;
+ * ',' used for a single number.
+ */
+static int
+eal_parse_set(const char *input, uint16_t set[], unsigned num)
+{
+ unsigned idx;
+ const char *str = input;
+ char *end = NULL;
+ unsigned min, max;
+
+ memset(set, 0, num * sizeof(uint16_t));
+
+ while (isblank(*str))
+ str++;
+
+ /* only digit or left bracket is qualify for start point */
+ if ((!isdigit(*str) && *str != '(') || *str == '\0')
+ return -1;
+
+ /* process single number or single range of number */
+ if (*str != '(') {
+ errno = 0;
+ idx = strtoul(str, &end, 10);
+ if (errno || end == NULL || idx >= num)
+ return -1;
+ else {
+ while (isblank(*end))
+ end++;
+
+ min = idx;
+ max = idx;
+ if (*end == '-') {
+ /* process single <number>-<number> */
+ end++;
+ while (isblank(*end))
+ end++;
+ if (!isdigit(*end))
+ return -1;
+
+ errno = 0;
+ idx = strtoul(end, &end, 10);
+ if (errno || end == NULL || idx >= num)
+ return -1;
+ max = idx;
+ while (isblank(*end))
+ end++;
+ if (*end != ',' && *end != '\0')
+ return -1;
+ }
+
+ if (*end != ',' && *end != '\0' &&
+ *end != '@')
+ return -1;
+
+ for (idx = RTE_MIN(min, max);
+ idx <= RTE_MAX(min, max); idx++)
+ set[idx] = 1;
+
+ return end - input;
+ }
+ }
+
+ /* process set within bracket */
+ str++;
+ while (isblank(*str))
+ str++;
+ if (*str == '\0')
+ return -1;
+
+ min = RTE_MAX_LCORE;
+ do {
+
+ /* go ahead to the first digit */
+ while (isblank(*str))
+ str++;
+ if (!isdigit(*str))
+ return -1;
+
+ /* get the digit value */
+ errno = 0;
+ idx = strtoul(str, &end, 10);
+ if (errno || end == NULL || idx >= num)
+ return -1;
+
+ /* go ahead to separator '-',',' and ')' */
+ while (isblank(*end))
+ end++;
+ if (*end == '-') {
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ else /* avoid continuous '-' */
+ return -1;
+ } else if ((*end == ',') || (*end == ')')) {
+ max = idx;
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ for (idx = RTE_MIN(min, max);
+ idx <= RTE_MAX(min, max); idx++)
+ set[idx] = 1;
+
+ min = RTE_MAX_LCORE;
+ } else
+ return -1;
+
+ str = end + 1;
+ } while (*end != '\0' && *end != ')');
+
+ return str - input;
+}
+
+/* convert from set array to cpuset bitmap */
+static int
+convert_to_cpuset(rte_cpuset_t *cpusetp,
+ uint16_t *set, unsigned num)
+{
+ unsigned idx;
+
+ CPU_ZERO(cpusetp);
+
+ for (idx = 0; idx < num; idx++) {
+ if (!set[idx])
+ continue;
+
+ if (!lcore_config[idx].detected) {
+ RTE_LOG(ERR, EAL, "core %u "
+ "unavailable\n", idx);
+ return -1;
+ }
+
+ CPU_SET(idx, cpusetp);
+ }
+
+ return 0;
+}
+
+/*
+ * The format pattern: --lcores='<lcores[@cpus]>[<,lcores[@cpus]>...]'
+ * lcores, cpus could be a single digit/range or a group.
+ * '(' and ')' are necessary if it's a group.
+ * If not supply '@cpus', the value of cpus uses the same as lcores.
+ * e.g. '1,2@(5-7),(3-5)@(0,2),(0,6),7-8' means start 9 EAL thread as below
+ * lcore 0 runs on cpuset 0x41 (cpu 0,6)
+ * lcore 1 runs on cpuset 0x2 (cpu 1)
+ * lcore 2 runs on cpuset 0xe0 (cpu 5,6,7)
+ * lcore 3,4,5 runs on cpuset 0x5 (cpu 0,2)
+ * lcore 6 runs on cpuset 0x41 (cpu 0,6)
+ * lcore 7 runs on cpuset 0x80 (cpu 7)
+ * lcore 8 runs on cpuset 0x100 (cpu 8)
+ */
+static int
+eal_parse_lcores(const char *lcores)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ static uint16_t set[RTE_MAX_LCORE];
+ unsigned idx = 0;
+ int i;
+ unsigned count = 0;
+ const char *lcore_start = NULL;
+ const char *end = NULL;
+ int offset;
+ rte_cpuset_t cpuset;
+ int lflags = 0;
+ int ret = -1;
+
+ if (lcores == NULL)
+ return -1;
+
+ /* Remove all blank characters ahead and after */
+ while (isblank(*lcores))
+ lcores++;
+ i = strlen(lcores);
+ while ((i > 0) && isblank(lcores[i - 1]))
+ i--;
+
+ CPU_ZERO(&cpuset);
+
+ /* Reset lcore config */
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ cfg->lcore_role[idx] = ROLE_OFF;
+ lcore_config[idx].core_index = -1;
+ CPU_ZERO(&lcore_config[idx].cpuset);
+ }
+
+ /* Get list of cores */
+ do {
+ while (isblank(*lcores))
+ lcores++;
+ if (*lcores == '\0')
+ goto err;
+
+ /* record lcore_set start point */
+ lcore_start = lcores;
+
+ /* go across a complete bracket */
+ if (*lcore_start == '(') {
+ lcores += strcspn(lcores, ")");
+ if (*lcores++ == '\0')
+ goto err;
+ }
+
+ /* scan the separator '@', ','(next) or '\0'(finish) */
+ lcores += strcspn(lcores, "@,");
+
+ if (*lcores == '@') {
+ /* explicit assign cpu_set */
+ offset = eal_parse_set(lcores + 1, set, RTE_DIM(set));
+ if (offset < 0)
+ goto err;
+
+ /* prepare cpu_set and update the end cursor */
+ if (0 > convert_to_cpuset(&cpuset,
+ set, RTE_DIM(set)))
+ goto err;
+ end = lcores + 1 + offset;
+ } else { /* ',' or '\0' */
+ /* haven't given cpu_set, current loop done */
+ end = lcores;
+
+ /* go back to check <number>-<number> */
+ offset = strcspn(lcore_start, "(-");
+ if (offset < (end - lcore_start) &&
+ *(lcore_start + offset) != '(')
+ lflags = 1;
+ }
+
+ if (*end != ',' && *end != '\0')
+ goto err;
+
+ /* parse lcore_set from start point */
+ if (0 > eal_parse_set(lcore_start, set, RTE_DIM(set)))
+ goto err;
+
+ /* without '@', by default using lcore_set as cpu_set */
+ if (*lcores != '@' &&
+ 0 > convert_to_cpuset(&cpuset, set, RTE_DIM(set)))
+ goto err;
+
+ /* start to update lcore_set */
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ if (!set[idx])
+ continue;
+
+ if (cfg->lcore_role[idx] != ROLE_RTE) {
+ lcore_config[idx].core_index = count;
+ cfg->lcore_role[idx] = ROLE_RTE;
+ count++;
+ }
+
+ if (lflags) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(idx, &cpuset);
+ }
+ rte_memcpy(&lcore_config[idx].cpuset, &cpuset,
+ sizeof(rte_cpuset_t));
+ }
+
+ lcores = end + 1;
+ } while (*end != '\0');
+
+ if (count == 0)
+ goto err;
+
+ cfg->lcore_count = count;
+ ret = 0;
+
+err:
+
+ return ret;
+}
+
+static int
+eal_parse_syslog(const char *facility, struct internal_config *conf)
+{
+ int i;
+ static struct {
+ const char *name;
+ int value;
+ } map[] = {
+ { "auth", LOG_AUTH },
+ { "cron", LOG_CRON },
+ { "daemon", LOG_DAEMON },
+ { "ftp", LOG_FTP },
+ { "kern", LOG_KERN },
+ { "lpr", LOG_LPR },
+ { "mail", LOG_MAIL },
+ { "news", LOG_NEWS },
+ { "syslog", LOG_SYSLOG },
+ { "user", LOG_USER },
+ { "uucp", LOG_UUCP },
+ { "local0", LOG_LOCAL0 },
+ { "local1", LOG_LOCAL1 },
+ { "local2", LOG_LOCAL2 },
+ { "local3", LOG_LOCAL3 },
+ { "local4", LOG_LOCAL4 },
+ { "local5", LOG_LOCAL5 },
+ { "local6", LOG_LOCAL6 },
+ { "local7", LOG_LOCAL7 },
+ { NULL, 0 }
+ };
+
+ for (i = 0; map[i].name; i++) {
+ if (!strcmp(facility, map[i].name)) {
+ conf->syslog_facility = map[i].value;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static int
+eal_parse_log_level(const char *level, uint32_t *log_level)
+{
+ char *end;
+ unsigned long tmp;
+
+ errno = 0;
+ tmp = strtoul(level, &end, 0);
+
+ /* check for errors */
+ if ((errno != 0) || (level[0] == '\0') ||
+ end == NULL || (*end != '\0'))
+ return -1;
+
+ /* log_level is a uint32_t */
+ if (tmp >= UINT32_MAX)
+ return -1;
+
+ *log_level = tmp;
+ return 0;
+}
+
+static enum rte_proc_type_t
+eal_parse_proc_type(const char *arg)
+{
+ if (strncasecmp(arg, "primary", sizeof("primary")) == 0)
+ return RTE_PROC_PRIMARY;
+ if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0)
+ return RTE_PROC_SECONDARY;
+ if (strncasecmp(arg, "auto", sizeof("auto")) == 0)
+ return RTE_PROC_AUTO;
+
+ return RTE_PROC_INVALID;
+}
+
+int
+eal_parse_common_option(int opt, const char *optarg,
+ struct internal_config *conf)
+{
+ switch (opt) {
+ /* blacklist */
+ case 'b':
+ if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI,
+ optarg) < 0) {
+ return -1;
+ }
+ break;
+ /* whitelist */
+ case 'w':
+ if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_PCI,
+ optarg) < 0) {
+ return -1;
+ }
+ break;
+ /* coremask */
+ case 'c':
+ if (eal_parse_coremask(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid coremask\n");
+ return -1;
+ }
+ break;
+ /* corelist */
+ case 'l':
+ if (eal_parse_corelist(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid core list\n");
+ return -1;
+ }
+ break;
+ /* size of memory */
+ case 'm':
+ conf->memory = atoi(optarg);
+ conf->memory *= 1024ULL;
+ conf->memory *= 1024ULL;
+ mem_parsed = 1;
+ break;
+ /* force number of channels */
+ case 'n':
+ conf->force_nchannel = atoi(optarg);
+ if (conf->force_nchannel == 0) {
+ RTE_LOG(ERR, EAL, "invalid channel number\n");
+ return -1;
+ }
+ break;
+ /* force number of ranks */
+ case 'r':
+ conf->force_nrank = atoi(optarg);
+ if (conf->force_nrank == 0 ||
+ conf->force_nrank > 16) {
+ RTE_LOG(ERR, EAL, "invalid rank number\n");
+ return -1;
+ }
+ break;
+ /* force loading of external driver */
+ case 'd':
+ if (eal_plugin_add(optarg) == -1)
+ return -1;
+ break;
+ case 'v':
+ /* since message is explicitly requested by user, we
+ * write message at highest log level so it can always
+ * be seen
+ * even if info or warning messages are disabled */
+ RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version());
+ break;
+
+ /* long options */
+ case OPT_HUGE_UNLINK_NUM:
+ conf->hugepage_unlink = 1;
+ break;
+
+ case OPT_NO_HUGE_NUM:
+ conf->no_hugetlbfs = 1;
+ break;
+
+ case OPT_NO_PCI_NUM:
+ conf->no_pci = 1;
+ break;
+
+ case OPT_NO_HPET_NUM:
+ conf->no_hpet = 1;
+ break;
+
+ case OPT_VMWARE_TSC_MAP_NUM:
+ conf->vmware_tsc_map = 1;
+ break;
+
+ case OPT_NO_SHCONF_NUM:
+ conf->no_shconf = 1;
+ break;
+
+ case OPT_PROC_TYPE_NUM:
+ conf->process_type = eal_parse_proc_type(optarg);
+ break;
+
+ case OPT_MASTER_LCORE_NUM:
+ if (eal_parse_master_lcore(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameter for --"
+ OPT_MASTER_LCORE "\n");
+ return -1;
+ }
+ break;
+
+ case OPT_VDEV_NUM:
+ if (rte_eal_devargs_add(RTE_DEVTYPE_VIRTUAL,
+ optarg) < 0) {
+ return -1;
+ }
+ break;
+
+ case OPT_SYSLOG_NUM:
+ if (eal_parse_syslog(optarg, conf) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_SYSLOG "\n");
+ return -1;
+ }
+ break;
+
+ case OPT_LOG_LEVEL_NUM: {
+ uint32_t log;
+
+ if (eal_parse_log_level(optarg, &log) < 0) {
+ RTE_LOG(ERR, EAL,
+ "invalid parameters for --"
+ OPT_LOG_LEVEL "\n");
+ return -1;
+ }
+ conf->log_level = log;
+ break;
+ }
+ case OPT_LCORES_NUM:
+ if (eal_parse_lcores(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameter for --"
+ OPT_LCORES "\n");
+ return -1;
+ }
+ break;
+
+ /* don't know what to do, leave this to caller */
+ default:
+ return 1;
+
+ }
+
+ return 0;
+}
+
+int
+eal_adjust_config(struct internal_config *internal_cfg)
+{
+ int i;
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (internal_config.process_type == RTE_PROC_AUTO)
+ internal_config.process_type = eal_proc_type_detect();
+
+ /* default master lcore is the first one */
+ if (!master_lcore_parsed)
+ cfg->master_lcore = rte_get_next_lcore(-1, 0, 0);
+
+ /* if no memory amounts were requested, this will result in 0 and
+ * will be overridden later, right after eal_hugepage_info_init() */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ internal_cfg->memory += internal_cfg->socket_mem[i];
+
+ return 0;
+}
+
+int
+eal_check_common_options(struct internal_config *internal_cfg)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (cfg->lcore_role[cfg->master_lcore] != ROLE_RTE) {
+ RTE_LOG(ERR, EAL, "Master lcore is not enabled for DPDK\n");
+ return -1;
+ }
+
+ if (internal_cfg->process_type == RTE_PROC_INVALID) {
+ RTE_LOG(ERR, EAL, "Invalid process type specified\n");
+ return -1;
+ }
+ if (index(internal_cfg->hugefile_prefix, '%') != NULL) {
+ RTE_LOG(ERR, EAL, "Invalid char, '%%', in --"OPT_FILE_PREFIX" "
+ "option\n");
+ return -1;
+ }
+ if (mem_parsed && internal_cfg->force_sockets == 1) {
+ RTE_LOG(ERR, EAL, "Options -m and --"OPT_SOCKET_MEM" cannot "
+ "be specified at the same time\n");
+ return -1;
+ }
+ if (internal_cfg->no_hugetlbfs && internal_cfg->force_sockets == 1) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_MEM" cannot "
+ "be specified together with --"OPT_NO_HUGE"\n");
+ return -1;
+ }
+
+ if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
+ "be specified together with --"OPT_NO_HUGE"\n");
+ return -1;
+ }
+
+ if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 0 &&
+ rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 0) {
+ RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) "
+ "cannot be used at the same time\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+eal_common_usage(void)
+{
+ printf("[options]\n\n"
+ "EAL common options:\n"
+ " -c COREMASK Hexadecimal bitmask of cores to run on\n"
+ " -l CORELIST List of cores to run on\n"
+ " The argument format is <c1>[-c2][,c3[-c4],...]\n"
+ " where c1, c2, etc are core indexes between 0 and %d\n"
+ " --"OPT_LCORES" COREMAP Map lcore set to physical cpu set\n"
+ " The argument format is\n"
+ " '<lcores[@cpus]>[<,lcores[@cpus]>...]'\n"
+ " lcores and cpus list are grouped by '(' and ')'\n"
+ " Within the group, '-' is used for range separator,\n"
+ " ',' is used for single number separator.\n"
+ " '( )' can be omitted for single element group,\n"
+ " '@' can be omitted if cpus and lcores have the same value\n"
+ " --"OPT_MASTER_LCORE" ID Core ID that is used as master\n"
+ " -n CHANNELS Number of memory channels\n"
+ " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n"
+ " -r RANKS Force number of memory ranks (don't detect)\n"
+ " -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n"
+ " Prevent EAL from using this PCI device. The argument\n"
+ " format is <domain:bus:devid.func>.\n"
+ " -w, --"OPT_PCI_WHITELIST" Add a PCI device in white list.\n"
+ " Only use the specified PCI devices. The argument format\n"
+ " is <[domain:]bus:devid.func>. This option can be present\n"
+ " several times (once per device).\n"
+ " [NOTE: PCI whitelist cannot be used with -b option]\n"
+ " --"OPT_VDEV" Add a virtual device.\n"
+ " The argument format is <driver><id>[,key=val,...]\n"
+ " (ex: --vdev=eth_pcap0,iface=eth2).\n"
+ " -d LIB.so|DIR Add a driver or driver directory\n"
+ " (can be used multiple times)\n"
+ " --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n"
+ " --"OPT_PROC_TYPE" Type of this process (primary|secondary|auto)\n"
+ " --"OPT_SYSLOG" Set syslog facility\n"
+ " --"OPT_LOG_LEVEL" Set default log level\n"
+ " -v Display version information on startup\n"
+ " -h, --help This help\n"
+ "\nEAL options for DEBUG use only:\n"
+ " --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n"
+ " --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n"
+ " --"OPT_NO_PCI" Disable PCI\n"
+ " --"OPT_NO_HPET" Disable HPET\n"
+ " --"OPT_NO_SHCONF" No shared config (mmap'd files)\n"
+ "\n", RTE_MAX_LCORE);
+}
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
new file mode 100644
index 00000000..40f49229
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -0,0 +1,457 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/* BSD LICENSE
+ *
+ * Copyright 2013-2014 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+
+#include <rte_interrupts.h>
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_per_lcore.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+#include <rte_devargs.h>
+
+#include "eal_private.h"
+
+struct pci_driver_list pci_driver_list;
+struct pci_device_list pci_device_list;
+
+static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
+{
+ struct rte_devargs *devargs;
+
+ TAILQ_FOREACH(devargs, &devargs_list, next) {
+ if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
+ devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
+ continue;
+ if (!rte_eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
+ return devargs;
+ }
+ return NULL;
+}
+
+/* map a particular resource from a file */
+void *
+pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
+ int additional_flags)
+{
+ void *mapaddr;
+
+ /* Map the PCI memory resource of device */
+ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | additional_flags, fd, offset);
+ if (mapaddr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
+ __func__, fd, requested_addr,
+ (unsigned long)size, (unsigned long)offset,
+ strerror(errno), mapaddr);
+ } else
+ RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr);
+
+ return mapaddr;
+}
+
+/* unmap a particular resource */
+void
+pci_unmap_resource(void *requested_addr, size_t size)
+{
+ if (requested_addr == NULL)
+ return;
+
+ /* Unmap the PCI memory resource of device */
+ if (munmap(requested_addr, size)) {
+ RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
+ __func__, requested_addr, (unsigned long)size,
+ strerror(errno));
+ } else
+ RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n",
+ requested_addr);
+}
+
+/*
+ * If vendor/device ID match, call the devinit() function of the
+ * driver.
+ */
+static int
+rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev)
+{
+ int ret;
+ const struct rte_pci_id *id_table;
+
+ for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) {
+
+ /* check if device's identifiers match the driver's ones */
+ if (id_table->vendor_id != dev->id.vendor_id &&
+ id_table->vendor_id != PCI_ANY_ID)
+ continue;
+ if (id_table->device_id != dev->id.device_id &&
+ id_table->device_id != PCI_ANY_ID)
+ continue;
+ if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id &&
+ id_table->subsystem_vendor_id != PCI_ANY_ID)
+ continue;
+ if (id_table->subsystem_device_id != dev->id.subsystem_device_id &&
+ id_table->subsystem_device_id != PCI_ANY_ID)
+ continue;
+
+ struct rte_pci_addr *loc = &dev->addr;
+
+ RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
+ loc->domain, loc->bus, loc->devid, loc->function,
+ dev->numa_node);
+
+ RTE_LOG(DEBUG, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id,
+ dev->id.device_id, dr->name);
+
+ /* no initialization when blacklisted, return without error */
+ if (dev->devargs != NULL &&
+ dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) {
+ RTE_LOG(DEBUG, EAL, " Device is blacklisted, not initializing\n");
+ return 1;
+ }
+
+ if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
+ /* map resources for devices that use igb_uio */
+ ret = rte_eal_pci_map_device(dev);
+ if (ret != 0)
+ return ret;
+ } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* unbind current driver */
+ if (pci_unbind_kernel_driver(dev) < 0)
+ return -1;
+ }
+
+ /* reference driver structure */
+ dev->driver = dr;
+
+ /* call the driver devinit() function */
+ return dr->devinit(dr, dev);
+ }
+ /* return positive value if driver doesn't support this device */
+ return 1;
+}
+
+/*
+ * If vendor/device ID match, call the devuninit() function of the
+ * driver.
+ */
+static int
+rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
+ struct rte_pci_device *dev)
+{
+ const struct rte_pci_id *id_table;
+
+ if ((dr == NULL) || (dev == NULL))
+ return -EINVAL;
+
+ for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) {
+
+ /* check if device's identifiers match the driver's ones */
+ if (id_table->vendor_id != dev->id.vendor_id &&
+ id_table->vendor_id != PCI_ANY_ID)
+ continue;
+ if (id_table->device_id != dev->id.device_id &&
+ id_table->device_id != PCI_ANY_ID)
+ continue;
+ if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id &&
+ id_table->subsystem_vendor_id != PCI_ANY_ID)
+ continue;
+ if (id_table->subsystem_device_id != dev->id.subsystem_device_id &&
+ id_table->subsystem_device_id != PCI_ANY_ID)
+ continue;
+
+ struct rte_pci_addr *loc = &dev->addr;
+
+ RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
+ loc->domain, loc->bus, loc->devid,
+ loc->function, dev->numa_node);
+
+ RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id,
+ dev->id.device_id, dr->name);
+
+ if (dr->devuninit && (dr->devuninit(dev) < 0))
+ return -1; /* negative value is an error */
+
+ /* clear driver structure */
+ dev->driver = NULL;
+
+ if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)
+ /* unmap resources for devices that use igb_uio */
+ rte_eal_pci_unmap_device(dev);
+
+ return 0;
+ }
+
+ /* return positive value if driver doesn't support this device */
+ return 1;
+}
+
+/*
+ * If vendor/device ID match, call the devinit() function of all
+ * registered driver for the given device. Return -1 if initialization
+ * failed, return 1 if no driver is found for this device.
+ */
+static int
+pci_probe_all_drivers(struct rte_pci_device *dev)
+{
+ struct rte_pci_driver *dr = NULL;
+ int rc = 0;
+
+ if (dev == NULL)
+ return -1;
+
+ TAILQ_FOREACH(dr, &pci_driver_list, next) {
+ rc = rte_eal_pci_probe_one_driver(dr, dev);
+ if (rc < 0)
+ /* negative value is an error */
+ return -1;
+ if (rc > 0)
+ /* positive value means driver doesn't support it */
+ continue;
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * If vendor/device ID match, call the devuninit() function of all
+ * registered driver for the given device. Return -1 if initialization
+ * failed, return 1 if no driver is found for this device.
+ */
+static int
+pci_detach_all_drivers(struct rte_pci_device *dev)
+{
+ struct rte_pci_driver *dr = NULL;
+ int rc = 0;
+
+ if (dev == NULL)
+ return -1;
+
+ TAILQ_FOREACH(dr, &pci_driver_list, next) {
+ rc = rte_eal_pci_detach_dev(dr, dev);
+ if (rc < 0)
+ /* negative value is an error */
+ return -1;
+ if (rc > 0)
+ /* positive value means driver doesn't support it */
+ continue;
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Find the pci device specified by pci address, then invoke probe function of
+ * the driver of the devive.
+ */
+int
+rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
+{
+ struct rte_pci_device *dev = NULL;
+ int ret = 0;
+
+ if (addr == NULL)
+ return -1;
+
+ TAILQ_FOREACH(dev, &pci_device_list, next) {
+ if (rte_eal_compare_pci_addr(&dev->addr, addr))
+ continue;
+
+ ret = pci_probe_all_drivers(dev);
+ if (ret < 0)
+ goto err_return;
+ return 0;
+ }
+ return -1;
+
+err_return:
+ RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
+ " cannot be used\n", dev->addr.domain, dev->addr.bus,
+ dev->addr.devid, dev->addr.function);
+ return -1;
+}
+
+/*
+ * Detach device specified by its pci address.
+ */
+int
+rte_eal_pci_detach(const struct rte_pci_addr *addr)
+{
+ struct rte_pci_device *dev = NULL;
+ int ret = 0;
+
+ if (addr == NULL)
+ return -1;
+
+ TAILQ_FOREACH(dev, &pci_device_list, next) {
+ if (rte_eal_compare_pci_addr(&dev->addr, addr))
+ continue;
+
+ ret = pci_detach_all_drivers(dev);
+ if (ret < 0)
+ goto err_return;
+
+ TAILQ_REMOVE(&pci_device_list, dev, next);
+ return 0;
+ }
+ return -1;
+
+err_return:
+ RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
+ " cannot be used\n", dev->addr.domain, dev->addr.bus,
+ dev->addr.devid, dev->addr.function);
+ return -1;
+}
+
+/*
+ * Scan the content of the PCI bus, and call the devinit() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ */
+int
+rte_eal_pci_probe(void)
+{
+ struct rte_pci_device *dev = NULL;
+ struct rte_devargs *devargs;
+ int probe_all = 0;
+ int ret = 0;
+
+ if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) == 0)
+ probe_all = 1;
+
+ TAILQ_FOREACH(dev, &pci_device_list, next) {
+
+ /* set devargs in PCI structure */
+ devargs = pci_devargs_lookup(dev);
+ if (devargs != NULL)
+ dev->devargs = devargs;
+
+ /* probe all or only whitelisted devices */
+ if (probe_all)
+ ret = pci_probe_all_drivers(dev);
+ else if (devargs != NULL &&
+ devargs->type == RTE_DEVTYPE_WHITELISTED_PCI)
+ ret = pci_probe_all_drivers(dev);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Requested device " PCI_PRI_FMT
+ " cannot be used\n", dev->addr.domain, dev->addr.bus,
+ dev->addr.devid, dev->addr.function);
+ }
+
+ return 0;
+}
+
+/* dump one device */
+static int
+pci_dump_one_device(FILE *f, struct rte_pci_device *dev)
+{
+ int i;
+
+ fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus,
+ dev->addr.devid, dev->addr.function);
+ fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id,
+ dev->id.device_id);
+
+ for (i = 0; i != sizeof(dev->mem_resource) /
+ sizeof(dev->mem_resource[0]); i++) {
+ fprintf(f, " %16.16"PRIx64" %16.16"PRIx64"\n",
+ dev->mem_resource[i].phys_addr,
+ dev->mem_resource[i].len);
+ }
+ return 0;
+}
+
+/* dump devices on the bus */
+void
+rte_eal_pci_dump(FILE *f)
+{
+ struct rte_pci_device *dev = NULL;
+
+ TAILQ_FOREACH(dev, &pci_device_list, next) {
+ pci_dump_one_device(f, dev);
+ }
+}
+
+/* register a driver */
+void
+rte_eal_pci_register(struct rte_pci_driver *driver)
+{
+ TAILQ_INSERT_TAIL(&pci_driver_list, driver, next);
+}
+
+/* unregister a driver */
+void
+rte_eal_pci_unregister(struct rte_pci_driver *driver)
+{
+ TAILQ_REMOVE(&pci_driver_list, driver, next);
+}
diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c
new file mode 100644
index 00000000..f062e81d
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_pci_uio.c
@@ -0,0 +1,222 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include <rte_eal.h>
+#include <rte_tailq.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+
+#include "eal_private.h"
+
+static struct rte_tailq_elem rte_uio_tailq = {
+ .name = "UIO_RESOURCE_LIST",
+};
+EAL_REGISTER_TAILQ(rte_uio_tailq)
+
+static int
+pci_uio_map_secondary(struct rte_pci_device *dev)
+{
+ int fd, i;
+ struct mapped_pci_resource *uio_res;
+ struct mapped_pci_res_list *uio_res_list =
+ RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
+
+ TAILQ_FOREACH(uio_res, uio_res_list, next) {
+
+ /* skip this element if it doesn't match our PCI address */
+ if (rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
+ continue;
+
+ for (i = 0; i != uio_res->nb_maps; i++) {
+ /*
+ * open devname, to mmap it
+ */
+ fd = open(uio_res->maps[i].path, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ uio_res->maps[i].path, strerror(errno));
+ return -1;
+ }
+
+ void *mapaddr = pci_map_resource(uio_res->maps[i].addr,
+ fd, (off_t)uio_res->maps[i].offset,
+ (size_t)uio_res->maps[i].size, 0);
+ /* fd is not needed in slave process, close it */
+ close(fd);
+ if (mapaddr != uio_res->maps[i].addr) {
+ RTE_LOG(ERR, EAL,
+ "Cannot mmap device resource file %s to address: %p\n",
+ uio_res->maps[i].path,
+ uio_res->maps[i].addr);
+ return -1;
+ }
+ }
+ return 0;
+ }
+
+ RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
+ return 1;
+}
+
+/* map the PCI resource of a PCI device in virtual memory */
+int
+pci_uio_map_resource(struct rte_pci_device *dev)
+{
+ int i, map_idx = 0, ret;
+ uint64_t phaddr;
+ struct mapped_pci_resource *uio_res = NULL;
+ struct mapped_pci_res_list *uio_res_list =
+ RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
+
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.uio_cfg_fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+ /* secondary processes - use already recorded details */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return pci_uio_map_secondary(dev);
+
+ /* allocate uio resource */
+ ret = pci_uio_alloc_resource(dev, &uio_res);
+ if (ret)
+ return ret;
+
+ /* Map all BARs */
+ for (i = 0; i != PCI_MAX_RESOURCE; i++) {
+ /* skip empty BAR */
+ phaddr = dev->mem_resource[i].phys_addr;
+ if (phaddr == 0)
+ continue;
+
+ ret = pci_uio_map_resource_by_index(dev, i,
+ uio_res, map_idx);
+ if (ret)
+ goto error;
+
+ map_idx++;
+ }
+
+ uio_res->nb_maps = map_idx;
+
+ TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
+
+ return 0;
+error:
+ for (i = 0; i < map_idx; i++) {
+ pci_unmap_resource(uio_res->maps[i].addr,
+ (size_t)uio_res->maps[i].size);
+ rte_free(uio_res->maps[i].path);
+ }
+ pci_uio_free_resource(dev, uio_res);
+ return -1;
+}
+
+static void
+pci_uio_unmap(struct mapped_pci_resource *uio_res)
+{
+ int i;
+
+ if (uio_res == NULL)
+ return;
+
+ for (i = 0; i != uio_res->nb_maps; i++) {
+ pci_unmap_resource(uio_res->maps[i].addr,
+ (size_t)uio_res->maps[i].size);
+ rte_free(uio_res->maps[i].path);
+ }
+}
+
+static struct mapped_pci_resource *
+pci_uio_find_resource(struct rte_pci_device *dev)
+{
+ struct mapped_pci_resource *uio_res;
+ struct mapped_pci_res_list *uio_res_list =
+ RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
+
+ if (dev == NULL)
+ return NULL;
+
+ TAILQ_FOREACH(uio_res, uio_res_list, next) {
+
+ /* skip this element if it doesn't match our PCI address */
+ if (!rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
+ return uio_res;
+ }
+ return NULL;
+}
+
+/* unmap the PCI resource of a PCI device in virtual memory */
+void
+pci_uio_unmap_resource(struct rte_pci_device *dev)
+{
+ struct mapped_pci_resource *uio_res;
+ struct mapped_pci_res_list *uio_res_list =
+ RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
+
+ if (dev == NULL)
+ return;
+
+ /* find an entry for the device */
+ uio_res = pci_uio_find_resource(dev);
+ if (uio_res == NULL)
+ return;
+
+ /* secondary processes - just free maps */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return pci_uio_unmap(uio_res);
+
+ TAILQ_REMOVE(uio_res_list, uio_res, next);
+
+ /* unmap all resources */
+ pci_uio_unmap(uio_res);
+
+ /* free uio resource */
+ rte_free(uio_res);
+
+ /* close fd if in primary process */
+ close(dev->intr_handle.fd);
+ if (dev->intr_handle.uio_cfg_fd >= 0) {
+ close(dev->intr_handle.uio_cfg_fd);
+ dev->intr_handle.uio_cfg_fd = -1;
+ }
+
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+}
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
new file mode 100644
index 00000000..12e0fcac
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -0,0 +1,61 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <rte_eal.h>
+
+#include "eal_filesystem.h"
+#include "eal_internal_cfg.h"
+
+int
+rte_eal_primary_proc_alive(const char *config_file_path)
+{
+ int config_fd;
+
+ if (config_file_path)
+ config_fd = open(config_file_path, O_RDONLY);
+ else {
+ char default_path[PATH_MAX+1];
+ snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,
+ default_config_dir, "rte");
+ config_fd = open(default_path, O_RDONLY);
+ }
+ if (config_fd < 0)
+ return 0;
+
+ int ret = lockf(config_fd, F_TEST, 0);
+ close(config_fd);
+
+ return !!ret;
+}
diff --git a/lib/librte_eal/common/eal_common_string_fns.c b/lib/librte_eal/common/eal_common_string_fns.c
new file mode 100644
index 00000000..125a3e2d
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_string_fns.c
@@ -0,0 +1,69 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include <rte_string_fns.h>
+
+/* split string into tokens */
+int
+rte_strsplit(char *string, int stringlen,
+ char **tokens, int maxtokens, char delim)
+{
+ int i, tok = 0;
+ int tokstart = 1; /* first token is right at start of string */
+
+ if (string == NULL || tokens == NULL)
+ goto einval_error;
+
+ for (i = 0; i < stringlen; i++) {
+ if (string[i] == '\0' || tok >= maxtokens)
+ break;
+ if (tokstart) {
+ tokstart = 0;
+ tokens[tok++] = &string[i];
+ }
+ if (string[i] == delim) {
+ string[i] = '\0';
+ tokstart = 1;
+ }
+ }
+ return tok;
+
+einval_error:
+ errno = EINVAL;
+ return -1;
+}
diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c
new file mode 100644
index 00000000..bb08ec8b
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_tailqs.c
@@ -0,0 +1,202 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+#include <stdint.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+
+TAILQ_HEAD(rte_tailq_elem_head, rte_tailq_elem);
+/* local tailq list */
+static struct rte_tailq_elem_head rte_tailq_elem_head =
+ TAILQ_HEAD_INITIALIZER(rte_tailq_elem_head);
+
+/* number of tailqs registered, -1 before call to rte_eal_tailqs_init */
+static int rte_tailqs_count = -1;
+
+struct rte_tailq_head *
+rte_eal_tailq_lookup(const char *name)
+{
+ unsigned i;
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ if (name == NULL)
+ return NULL;
+
+ for (i = 0; i < RTE_MAX_TAILQ; i++) {
+ if (!strncmp(name, mcfg->tailq_head[i].name,
+ RTE_TAILQ_NAMESIZE-1))
+ return &mcfg->tailq_head[i];
+ }
+
+ return NULL;
+}
+
+void
+rte_dump_tailq(FILE *f)
+{
+ struct rte_mem_config *mcfg;
+ unsigned i = 0;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_read_lock(&mcfg->qlock);
+ for (i = 0; i < RTE_MAX_TAILQ; i++) {
+ const struct rte_tailq_head *tailq = &mcfg->tailq_head[i];
+ const struct rte_tailq_entry_head *head = &tailq->tailq_head;
+
+ fprintf(f, "Tailq %u: qname:<%s>, tqh_first:%p, tqh_last:%p\n",
+ i, tailq->name, head->tqh_first, head->tqh_last);
+ }
+ rte_rwlock_read_unlock(&mcfg->qlock);
+}
+
+static struct rte_tailq_head *
+rte_eal_tailq_create(const char *name)
+{
+ struct rte_tailq_head *head = NULL;
+
+ if (!rte_eal_tailq_lookup(name) &&
+ (rte_tailqs_count + 1 < RTE_MAX_TAILQ)) {
+ struct rte_mem_config *mcfg;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+ head = &mcfg->tailq_head[rte_tailqs_count];
+ snprintf(head->name, sizeof(head->name) - 1, "%s", name);
+ TAILQ_INIT(&head->tailq_head);
+ rte_tailqs_count++;
+ }
+
+ return head;
+}
+
+/* local register, used to store "early" tailqs before rte_eal_init() and to
+ * ensure secondary process only registers tailqs once. */
+static int
+rte_eal_tailq_local_register(struct rte_tailq_elem *t)
+{
+ struct rte_tailq_elem *temp;
+
+ TAILQ_FOREACH(temp, &rte_tailq_elem_head, next) {
+ if (!strncmp(t->name, temp->name, sizeof(temp->name)))
+ return -1;
+ }
+
+ TAILQ_INSERT_TAIL(&rte_tailq_elem_head, t, next);
+ return 0;
+}
+
+static void
+rte_eal_tailq_update(struct rte_tailq_elem *t)
+{
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* primary process is the only one that creates */
+ t->head = rte_eal_tailq_create(t->name);
+ } else {
+ t->head = rte_eal_tailq_lookup(t->name);
+ }
+}
+
+int
+rte_eal_tailq_register(struct rte_tailq_elem *t)
+{
+ if (rte_eal_tailq_local_register(t) < 0) {
+ RTE_LOG(ERR, EAL,
+ "%s tailq is already registered\n", t->name);
+ goto error;
+ }
+
+ /* if a register happens after rte_eal_tailqs_init(), then we can update
+ * tailq head */
+ if (rte_tailqs_count >= 0) {
+ rte_eal_tailq_update(t);
+ if (t->head == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Cannot initialize tailq: %s\n", t->name);
+ TAILQ_REMOVE(&rte_tailq_elem_head, t, next);
+ goto error;
+ }
+ }
+
+ return 0;
+
+error:
+ t->head = NULL;
+ return -1;
+}
+
+int
+rte_eal_tailqs_init(void)
+{
+ struct rte_tailq_elem *t;
+
+ rte_tailqs_count = 0;
+
+ TAILQ_FOREACH(t, &rte_tailq_elem_head, next) {
+ /* second part of register job for "early" tailqs, see
+ * rte_eal_tailq_register and EAL_REGISTER_TAILQ */
+ rte_eal_tailq_update(t);
+ if (t->head == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Cannot initialize tailq: %s\n", t->name);
+ /* no need to TAILQ_REMOVE, we are going to panic in
+ * rte_eal_init() */
+ goto fail;
+ }
+ }
+
+ return 0;
+
+fail:
+ rte_dump_tailq(stderr);
+ return -1;
+}
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
new file mode 100644
index 00000000..2405e93f
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -0,0 +1,157 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sched.h>
+#include <assert.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_log.h>
+
+#include "eal_thread.h"
+
+RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+
+unsigned rte_socket_id(void)
+{
+ return RTE_PER_LCORE(_socket_id);
+}
+
+int eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
+{
+ unsigned cpu = 0;
+ int socket_id = SOCKET_ID_ANY;
+ int sid;
+
+ if (cpusetp == NULL)
+ return SOCKET_ID_ANY;
+
+ do {
+ if (!CPU_ISSET(cpu, cpusetp))
+ continue;
+
+ if (socket_id == SOCKET_ID_ANY)
+ socket_id = eal_cpu_socket_id(cpu);
+
+ sid = eal_cpu_socket_id(cpu);
+ if (socket_id != sid) {
+ socket_id = SOCKET_ID_ANY;
+ break;
+ }
+
+ } while (++cpu < RTE_MAX_LCORE);
+
+ return socket_id;
+}
+
+int
+rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+{
+ int s;
+ unsigned lcore_id;
+ pthread_t tid;
+
+ tid = pthread_self();
+
+ s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
+ if (s != 0) {
+ RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+ return -1;
+ }
+
+ /* store socket_id in TLS for quick access */
+ RTE_PER_LCORE(_socket_id) =
+ eal_cpuset_socket_id(cpusetp);
+
+ /* store cpuset in TLS for quick access */
+ memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
+ sizeof(rte_cpuset_t));
+
+ lcore_id = rte_lcore_id();
+ if (lcore_id != (unsigned)LCORE_ID_ANY) {
+ /* EAL thread will update lcore_config */
+ lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
+ memmove(&lcore_config[lcore_id].cpuset, cpusetp,
+ sizeof(rte_cpuset_t));
+ }
+
+ return 0;
+}
+
+void
+rte_thread_get_affinity(rte_cpuset_t *cpusetp)
+{
+ assert(cpusetp);
+ memmove(cpusetp, &RTE_PER_LCORE(_cpuset),
+ sizeof(rte_cpuset_t));
+}
+
+int
+eal_thread_dump_affinity(char *str, unsigned size)
+{
+ rte_cpuset_t cpuset;
+ unsigned cpu;
+ int ret;
+ unsigned int out = 0;
+
+ rte_thread_get_affinity(&cpuset);
+
+ for (cpu = 0; cpu < RTE_MAX_LCORE; cpu++) {
+ if (!CPU_ISSET(cpu, &cpuset))
+ continue;
+
+ ret = snprintf(str + out,
+ size - out, "%u,", cpu);
+ if (ret < 0 || (unsigned)ret >= size - out) {
+ /* string will be truncated */
+ ret = -1;
+ goto exit;
+ }
+
+ out += ret;
+ }
+
+ ret = 0;
+exit:
+ /* remove the last separator */
+ if (out > 0)
+ str[out - 1] = '\0';
+
+ return ret;
+}
diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c
new file mode 100644
index 00000000..c4227cd8
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_timer.c
@@ -0,0 +1,86 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+
+#include "eal_private.h"
+
+/* The frequency of the RDTSC timer resolution */
+static uint64_t eal_tsc_resolution_hz;
+
+void
+rte_delay_us(unsigned us)
+{
+ const uint64_t start = rte_get_timer_cycles();
+ const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6;
+ while ((rte_get_timer_cycles() - start) < ticks)
+ rte_pause();
+}
+
+uint64_t
+rte_get_tsc_hz(void)
+{
+ return eal_tsc_resolution_hz;
+}
+
+static uint64_t
+estimate_tsc_freq(void)
+{
+ RTE_LOG(WARNING, EAL, "WARNING: TSC frequency estimated roughly"
+ " - clock timings may be less accurate.\n");
+ /* assume that the sleep(1) will sleep for 1 second */
+ uint64_t start = rte_rdtsc();
+ sleep(1);
+ return rte_rdtsc() - start;
+}
+
+void
+set_tsc_freq(void)
+{
+ uint64_t freq = get_tsc_freq();
+
+ if (!freq)
+ freq = estimate_tsc_freq();
+
+ RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000);
+ eal_tsc_resolution_hz = freq;
+}
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
new file mode 100644
index 00000000..fdb4a70b
--- /dev/null
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -0,0 +1,118 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Stores functions and path defines for files and directories
+ * on the filesystem for Linux, that are used by the Linux EAL.
+ */
+
+#ifndef EAL_FILESYSTEM_H
+#define EAL_FILESYSTEM_H
+
+/** Path of rte config file. */
+#define RUNTIME_CONFIG_FMT "%s/.%s_config"
+
+#include <stdint.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <rte_string_fns.h>
+#include "eal_internal_cfg.h"
+
+static const char *default_config_dir = "/var/run";
+
+static inline const char *
+eal_runtime_config_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+ const char *directory = default_config_dir;
+ const char *home_dir = getenv("HOME");
+
+ if (getuid() != 0 && home_dir != NULL)
+ directory = home_dir;
+ snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,
+ internal_config.hugefile_prefix);
+ return buffer;
+}
+
+/** Path of hugepage info file. */
+#define HUGEPAGE_INFO_FMT "%s/.%s_hugepage_info"
+
+static inline const char *
+eal_hugepage_info_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+ const char *directory = default_config_dir;
+ const char *home_dir = getenv("HOME");
+
+ if (getuid() != 0 && home_dir != NULL)
+ directory = home_dir;
+ snprintf(buffer, sizeof(buffer) - 1, HUGEPAGE_INFO_FMT, directory,
+ internal_config.hugefile_prefix);
+ return buffer;
+}
+
+/** String format for hugepage map files. */
+#define HUGEFILE_FMT "%s/%smap_%d"
+#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d"
+
+static inline const char *
+eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
+{
+ snprintf(buffer, buflen, HUGEFILE_FMT, hugedir,
+ internal_config.hugefile_prefix, f_id);
+ buffer[buflen - 1] = '\0';
+ return buffer;
+}
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+static inline const char *
+eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
+{
+ snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir,
+ internal_config.hugefile_prefix, f_id);
+ buffer[buflen - 1] = '\0';
+ return buffer;
+}
+#endif
+
+/** define the default filename prefix for the %s values above */
+#define HUGEFILE_PREFIX_DEFAULT "rte"
+
+/** Function to read a single numeric value from a file on the filesystem.
+ * Used to read information from files on /sys */
+int eal_parse_sysfs_value(const char *filename, unsigned long *val);
+
+#endif /* EAL_FILESYSTEM_H */
diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h
new file mode 100644
index 00000000..38edac03
--- /dev/null
+++ b/lib/librte_eal/common/eal_hugepages.h
@@ -0,0 +1,67 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_HUGEPAGES_H
+#define EAL_HUGEPAGES_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <limits.h>
+
+#define MAX_HUGEPAGE_PATH PATH_MAX
+
+/**
+ * Structure used to store informations about hugepages that we mapped
+ * through the files in hugetlbfs.
+ */
+struct hugepage_file {
+ void *orig_va; /**< virtual addr of first mmap() */
+ void *final_va; /**< virtual addr of 2nd mmap() */
+ uint64_t physaddr; /**< physical addr */
+ size_t size; /**< the page size */
+ int socket_id; /**< NUMA socket ID */
+ int file_id; /**< the '%d' in HUGEFILE_FMT */
+ int memseg_id; /**< the memory segment to which page belongs */
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ int repeated; /**< number of times the page size is repeated */
+#endif
+ char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
+};
+
+/**
+ * Read the information from linux on what hugepages are available
+ * for the EAL to use
+ */
+int eal_hugepage_info_init(void);
+
+#endif /* EAL_HUGEPAGES_H */
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
new file mode 100644
index 00000000..5f1367eb
--- /dev/null
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -0,0 +1,94 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Holds the structures for the eal internal configuration
+ */
+
+#ifndef EAL_INTERNAL_CFG_H
+#define EAL_INTERNAL_CFG_H
+
+#include <rte_eal.h>
+#include <rte_pci_dev_feature_defs.h>
+
+#define MAX_HUGEPAGE_SIZES 3 /**< support up to 3 page sizes */
+
+/*
+ * internal configuration structure for the number, size and
+ * mount points of hugepages
+ */
+struct hugepage_info {
+ uint64_t hugepage_sz; /**< size of a huge page */
+ const char *hugedir; /**< dir where hugetlbfs is mounted */
+ uint32_t num_pages[RTE_MAX_NUMA_NODES];
+ /**< number of hugepages of that size on each socket */
+ int lock_descriptor; /**< file descriptor for hugepage dir */
+};
+
+/**
+ * internal configuration
+ */
+struct internal_config {
+ volatile size_t memory; /**< amount of asked memory */
+ volatile unsigned force_nchannel; /**< force number of channels */
+ volatile unsigned force_nrank; /**< force number of ranks */
+ volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
+ unsigned hugepage_unlink; /**< true to unlink backing files */
+ volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
+ volatile unsigned no_pci; /**< true to disable PCI */
+ volatile unsigned no_hpet; /**< true to disable HPET */
+ volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
+ * instead of native TSC */
+ volatile unsigned no_shconf; /**< true if there is no shared config */
+ volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
+ volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
+ /** true to try allocating memory on specific sockets */
+ volatile unsigned force_sockets;
+ volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
+ uintptr_t base_virtaddr; /**< base address to try and reserve memory from */
+ volatile int syslog_facility; /**< facility passed to openlog() */
+ volatile uint32_t log_level; /**< default log level */
+ /** default interrupt mode for VFIO */
+ volatile enum rte_intr_mode vfio_intr_mode;
+ const char *hugefile_prefix; /**< the base filename of hugetlbfs files */
+ const char *hugepage_dir; /**< specific hugetlbfs directory to use */
+
+ unsigned num_hugepage_sizes; /**< how many sizes on this system */
+ struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+};
+extern struct internal_config internal_config; /**< Global EAL configuration. */
+
+void eal_reset_internal_config(struct internal_config *internal_cfg);
+
+#endif /* EAL_INTERNAL_CFG_H */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
new file mode 100644
index 00000000..a881c62e
--- /dev/null
+++ b/lib/librte_eal/common/eal_options.h
@@ -0,0 +1,100 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_OPTIONS_H
+#define EAL_OPTIONS_H
+
+enum {
+ /* long options mapped to a short option */
+#define OPT_HELP "help"
+ OPT_HELP_NUM = 'h',
+#define OPT_PCI_BLACKLIST "pci-blacklist"
+ OPT_PCI_BLACKLIST_NUM = 'b',
+#define OPT_PCI_WHITELIST "pci-whitelist"
+ OPT_PCI_WHITELIST_NUM = 'w',
+
+ /* first long only option value must be >= 256, so that we won't
+ * conflict with short options */
+ OPT_LONG_MIN_NUM = 256,
+#define OPT_BASE_VIRTADDR "base-virtaddr"
+ OPT_BASE_VIRTADDR_NUM,
+#define OPT_CREATE_UIO_DEV "create-uio-dev"
+ OPT_CREATE_UIO_DEV_NUM,
+#define OPT_FILE_PREFIX "file-prefix"
+ OPT_FILE_PREFIX_NUM,
+#define OPT_HUGE_DIR "huge-dir"
+ OPT_HUGE_DIR_NUM,
+#define OPT_HUGE_UNLINK "huge-unlink"
+ OPT_HUGE_UNLINK_NUM,
+#define OPT_LCORES "lcores"
+ OPT_LCORES_NUM,
+#define OPT_LOG_LEVEL "log-level"
+ OPT_LOG_LEVEL_NUM,
+#define OPT_MASTER_LCORE "master-lcore"
+ OPT_MASTER_LCORE_NUM,
+#define OPT_PROC_TYPE "proc-type"
+ OPT_PROC_TYPE_NUM,
+#define OPT_NO_HPET "no-hpet"
+ OPT_NO_HPET_NUM,
+#define OPT_NO_HUGE "no-huge"
+ OPT_NO_HUGE_NUM,
+#define OPT_NO_PCI "no-pci"
+ OPT_NO_PCI_NUM,
+#define OPT_NO_SHCONF "no-shconf"
+ OPT_NO_SHCONF_NUM,
+#define OPT_SOCKET_MEM "socket-mem"
+ OPT_SOCKET_MEM_NUM,
+#define OPT_SYSLOG "syslog"
+ OPT_SYSLOG_NUM,
+#define OPT_VDEV "vdev"
+ OPT_VDEV_NUM,
+#define OPT_VFIO_INTR "vfio-intr"
+ OPT_VFIO_INTR_NUM,
+#define OPT_VMWARE_TSC_MAP "vmware-tsc-map"
+ OPT_VMWARE_TSC_MAP_NUM,
+#define OPT_XEN_DOM0 "xen-dom0"
+ OPT_XEN_DOM0_NUM,
+ OPT_LONG_MAX_NUM
+};
+
+extern const char eal_short_options[];
+extern const struct option eal_long_options[];
+
+int eal_parse_common_option(int opt, const char *argv,
+ struct internal_config *conf);
+int eal_adjust_config(struct internal_config *internal_cfg);
+int eal_check_common_options(struct internal_config *internal_cfg);
+void eal_common_usage(void);
+enum rte_proc_type_t eal_proc_type_detect(void);
+int eal_plugins_init(void);
+
+#endif /* EAL_OPTIONS_H */
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
new file mode 100644
index 00000000..2342fa16
--- /dev/null
+++ b/lib/librte_eal/common/eal_private.h
@@ -0,0 +1,331 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _EAL_PRIVATE_H_
+#define _EAL_PRIVATE_H_
+
+#include <stdio.h>
+#include <rte_pci.h>
+
+/**
+ * Initialize the memzone subsystem (private to eal).
+ *
+ * @return
+ * - 0 on success
+ * - Negative on error
+ */
+int rte_eal_memzone_init(void);
+
+/**
+ * Common log initialization function (private to eal).
+ *
+ * Called by environment-specific log initialization function to initialize
+ * log history.
+ *
+ * @param default_log
+ * The default log stream to be used.
+ * @return
+ * - 0 on success
+ * - Negative on error
+ */
+int rte_eal_common_log_init(FILE *default_log);
+
+/**
+ * Fill configuration with number of physical and logical processors
+ *
+ * This function is private to EAL.
+ *
+ * Parse /proc/cpuinfo to get the number of physical and logical
+ * processors on the machine.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_cpu_init(void);
+
+/**
+ * Map memory
+ *
+ * This function is private to EAL.
+ *
+ * Fill configuration structure with these infos, and return 0 on success.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_memory_init(void);
+
+/**
+ * Configure timers
+ *
+ * This function is private to EAL.
+ *
+ * Mmap memory areas used by HPET (high precision event timer) that will
+ * provide our time reference, and configure the TSC frequency also for it
+ * to be used as a reference.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_timer_init(void);
+
+/**
+ * Init early logs
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_log_early_init(void);
+
+/**
+ * Init the default log stream
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_log_init(const char *id, int facility);
+
+/**
+ * Init the default log stream
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_pci_init(void);
+
+#ifdef RTE_LIBRTE_IVSHMEM
+/**
+ * Init the memory from IVSHMEM devices
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_ivshmem_init(void);
+
+/**
+ * Init objects in IVSHMEM devices
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_ivshmem_obj_init(void);
+#endif
+
+struct rte_pci_driver;
+struct rte_pci_device;
+
+/**
+ * Unbind kernel driver for this device
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int pci_unbind_kernel_driver(struct rte_pci_device *dev);
+
+/**
+ * Map the PCI resource of a PCI device in virtual memory
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int pci_uio_map_resource(struct rte_pci_device *dev);
+
+/**
+ * Unmap the PCI resource of a PCI device
+ *
+ * This function is private to EAL.
+ */
+void pci_uio_unmap_resource(struct rte_pci_device *dev);
+
+/**
+ * Allocate uio resource for PCI device
+ *
+ * This function is private to EAL.
+ *
+ * @param dev
+ * PCI device to allocate uio resource
+ * @param uio_res
+ * Pointer to uio resource.
+ * If the function returns 0, the pointer will be filled.
+ * @return
+ * 0 on success, negative on error
+ */
+int pci_uio_alloc_resource(struct rte_pci_device *dev,
+ struct mapped_pci_resource **uio_res);
+
+/**
+ * Free uio resource for PCI device
+ *
+ * This function is private to EAL.
+ *
+ * @param dev
+ * PCI device to free uio resource
+ * @param uio_res
+ * Pointer to uio resource.
+ */
+void pci_uio_free_resource(struct rte_pci_device *dev,
+ struct mapped_pci_resource *uio_res);
+
+/**
+ * Map device memory to uio resource
+ *
+ * This function is private to EAL.
+ *
+ * @param dev
+ * PCI device that has memory information.
+ * @param res_idx
+ * Memory resource index of the PCI device.
+ * @param uio_res
+ * uio resource that will keep mapping information.
+ * @param map_idx
+ * Mapping information index of the uio resource.
+ * @return
+ * 0 on success, negative on error
+ */
+int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
+ struct mapped_pci_resource *uio_res, int map_idx);
+
+/**
+ * Init tail queues for non-EAL library structures. This is to allow
+ * the rings, mempools, etc. lists to be shared among multiple processes
+ *
+ * This function is private to EAL
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_tailqs_init(void);
+
+/**
+ * Init interrupt handling.
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_intr_init(void);
+
+/**
+ * Init alarm mechanism. This is to allow a callback be called after
+ * specific time.
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_alarm_init(void);
+
+/**
+ * This function initialises any virtual devices
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_dev_init(void);
+
+/**
+ * Function is to check if the kernel module(like, vfio, vfio_iommu_type1,
+ * etc.) loaded.
+ *
+ * @param module_name
+ * The module's name which need to be checked
+ *
+ * @return
+ * -1 means some error happens(NULL pointer or open failure)
+ * 0 means the module not loaded
+ * 1 means the module loaded
+ */
+int rte_eal_check_module(const char *module_name);
+
+/**
+ * Get cpu core_id.
+ *
+ * This function is private to the EAL.
+ */
+unsigned eal_cpu_core_id(unsigned lcore_id);
+
+/**
+ * Check if cpu is present.
+ *
+ * This function is private to the EAL.
+ */
+int eal_cpu_detected(unsigned lcore_id);
+
+/**
+ * Set TSC frequency from precise value or estimation
+ *
+ * This function is private to the EAL.
+ */
+void set_tsc_freq(void);
+
+/**
+ * Get precise TSC frequency from system
+ *
+ * This function is private to the EAL.
+ */
+uint64_t get_tsc_freq(void);
+
+/**
+ * Prepare physical memory mapping
+ * i.e. hugepages on Linux and
+ * contigmem on BSD.
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_hugepage_init(void);
+
+/**
+ * Creates memory mapping in secondary process
+ * i.e. hugepages on Linux and
+ * contigmem on BSD.
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_hugepage_attach(void);
+
+#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
new file mode 100644
index 00000000..e4e76b9d
--- /dev/null
+++ b/lib/librte_eal/common/eal_thread.h
@@ -0,0 +1,100 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_THREAD_H
+#define EAL_THREAD_H
+
+#include <rte_lcore.h>
+
+/**
+ * basic loop of thread, called for each thread by eal_init().
+ *
+ * @param arg
+ * opaque pointer
+ */
+__attribute__((noreturn)) void *eal_thread_loop(void *arg);
+
+/**
+ * Init per-lcore info for master thread
+ *
+ * @param lcore_id
+ * identifier of master lcore
+ */
+void eal_thread_init_master(unsigned lcore_id);
+
+/**
+ * Get the NUMA socket id from cpu id.
+ * This function is private to EAL.
+ *
+ * @param cpu_id
+ * The logical process id.
+ * @return
+ * socket_id or SOCKET_ID_ANY
+ */
+unsigned eal_cpu_socket_id(unsigned cpu_id);
+
+/**
+ * Get the NUMA socket id from cpuset.
+ * This function is private to EAL.
+ *
+ * @param cpusetp
+ * The point to a valid cpu set.
+ * @return
+ * socket_id or SOCKET_ID_ANY
+ */
+int eal_cpuset_socket_id(rte_cpuset_t *cpusetp);
+
+/**
+ * Default buffer size to use with eal_thread_dump_affinity()
+ */
+#define RTE_CPU_AFFINITY_STR_LEN 256
+
+/**
+ * Dump the current pthread cpuset.
+ * This function is private to EAL.
+ *
+ * Note:
+ * If the dump size is greater than the size of given buffer,
+ * the string will be truncated and with '\0' at the end.
+ *
+ * @param str
+ * The string buffer the cpuset will dump to.
+ * @param size
+ * The string buffer size.
+ * @return
+ * 0 for success, -1 if truncation happens.
+ */
+int
+eal_thread_dump_affinity(char *str, unsigned size);
+
+#endif /* EAL_THREAD_H */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h b/lib/librte_eal/common/include/arch/arm/rte_atomic.h
new file mode 100644
index 00000000..454a12b0
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h
@@ -0,0 +1,48 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ATOMIC_ARM_H_
+#define _RTE_ATOMIC_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_atomic_64.h>
+#else
+#include <rte_atomic_32.h>
+#endif
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_wmb()
+
+#define rte_smp_rmb() rte_rmb()
+
+#endif /* _RTE_ATOMIC_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h
new file mode 100644
index 00000000..9ae1e78b
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h
@@ -0,0 +1,74 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ATOMIC_ARM32_H_
+#define _RTE_ATOMIC_ARM32_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ */
+#define rte_mb() __sync_synchronize()
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ */
+#define rte_wmb() do { asm volatile ("dmb st" : : : "memory"); } while (0)
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ */
+#define rte_rmb() __sync_synchronize()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_ARM32_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
new file mode 100644
index 00000000..671caa76
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
@@ -0,0 +1,88 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium networks Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium networks nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_ATOMIC_ARM64_H_
+#define _RTE_ATOMIC_ARM64_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+#define dmb(opt) do { asm volatile("dmb " #opt : : : "memory"); } while (0)
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_mb(void)
+{
+ dmb(ish);
+}
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_wmb(void)
+{
+ dmb(ishst);
+}
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_rmb(void)
+{
+ dmb(ishld);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_ARM64_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
new file mode 100644
index 00000000..3f2dd1f2
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
@@ -0,0 +1,107 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BYTEORDER_ARM_H_
+#define _RTE_BYTEORDER_ARM_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_byteorder.h"
+
+/* fix missing __builtin_bswap16 for gcc older then 4.8 */
+#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+
+static inline uint16_t rte_arch_bswap16(uint16_t _x)
+{
+ register uint16_t x = _x;
+
+ asm volatile ("rev16 %0,%1"
+ : "=r" (x)
+ : "r" (x)
+ );
+ return x;
+}
+
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap16(x) : \
+ rte_arch_bswap16(x)))
+#endif
+
+/* ARM architecture is bi-endian (both big and little). */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define rte_cpu_to_le_16(x) (x)
+#define rte_cpu_to_le_32(x) (x)
+#define rte_cpu_to_le_64(x) (x)
+
+#define rte_cpu_to_be_16(x) rte_bswap16(x)
+#define rte_cpu_to_be_32(x) rte_bswap32(x)
+#define rte_cpu_to_be_64(x) rte_bswap64(x)
+
+#define rte_le_to_cpu_16(x) (x)
+#define rte_le_to_cpu_32(x) (x)
+#define rte_le_to_cpu_64(x) (x)
+
+#define rte_be_to_cpu_16(x) rte_bswap16(x)
+#define rte_be_to_cpu_32(x) rte_bswap32(x)
+#define rte_be_to_cpu_64(x) rte_bswap64(x)
+
+#else /* RTE_BIG_ENDIAN */
+
+#define rte_cpu_to_le_16(x) rte_bswap16(x)
+#define rte_cpu_to_le_32(x) rte_bswap32(x)
+#define rte_cpu_to_le_64(x) rte_bswap64(x)
+
+#define rte_cpu_to_be_16(x) (x)
+#define rte_cpu_to_be_32(x) (x)
+#define rte_cpu_to_be_64(x) (x)
+
+#define rte_le_to_cpu_16(x) rte_bswap16(x)
+#define rte_le_to_cpu_32(x) rte_bswap32(x)
+#define rte_le_to_cpu_64(x) rte_bswap64(x)
+
+#define rte_be_to_cpu_16(x) (x)
+#define rte_be_to_cpu_32(x) (x)
+#define rte_be_to_cpu_64(x) (x)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h
new file mode 100644
index 00000000..b8f62889
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h
@@ -0,0 +1,42 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CPUFLAGS_ARM_H_
+#define _RTE_CPUFLAGS_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_cpuflags_64.h>
+#else
+#include <rte_cpuflags_32.h>
+#endif
+
+#endif /* _RTE_CPUFLAGS_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h
new file mode 100644
index 00000000..eb02d9b9
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h
@@ -0,0 +1,82 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CPUFLAGS_ARM32_H_
+#define _RTE_CPUFLAGS_ARM32_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+ RTE_CPUFLAG_SWP = 0,
+ RTE_CPUFLAG_HALF,
+ RTE_CPUFLAG_THUMB,
+ RTE_CPUFLAG_A26BIT,
+ RTE_CPUFLAG_FAST_MULT,
+ RTE_CPUFLAG_FPA,
+ RTE_CPUFLAG_VFP,
+ RTE_CPUFLAG_EDSP,
+ RTE_CPUFLAG_JAVA,
+ RTE_CPUFLAG_IWMMXT,
+ RTE_CPUFLAG_CRUNCH,
+ RTE_CPUFLAG_THUMBEE,
+ RTE_CPUFLAG_NEON,
+ RTE_CPUFLAG_VFPv3,
+ RTE_CPUFLAG_VFPv3D16,
+ RTE_CPUFLAG_TLS,
+ RTE_CPUFLAG_VFPv4,
+ RTE_CPUFLAG_IDIVA,
+ RTE_CPUFLAG_IDIVT,
+ RTE_CPUFLAG_VFPD32,
+ RTE_CPUFLAG_LPAE,
+ RTE_CPUFLAG_EVTSTRM,
+ RTE_CPUFLAG_AES,
+ RTE_CPUFLAG_PMULL,
+ RTE_CPUFLAG_SHA1,
+ RTE_CPUFLAG_SHA2,
+ RTE_CPUFLAG_CRC32,
+ RTE_CPUFLAG_V7L,
+ /* The last item */
+ RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_ARM32_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h
new file mode 100644
index 00000000..49aead92
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h
@@ -0,0 +1,64 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium networks Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium networks nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CPUFLAGS_ARM64_H_
+#define _RTE_CPUFLAGS_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+ RTE_CPUFLAG_FP = 0,
+ RTE_CPUFLAG_NEON,
+ RTE_CPUFLAG_EVTSTRM,
+ RTE_CPUFLAG_AES,
+ RTE_CPUFLAG_PMULL,
+ RTE_CPUFLAG_SHA1,
+ RTE_CPUFLAG_SHA2,
+ RTE_CPUFLAG_CRC32,
+ RTE_CPUFLAG_ATOMICS,
+ RTE_CPUFLAG_AARCH64,
+ /* The last item */
+ RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_ARM64_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles.h b/lib/librte_eal/common/include/arch/arm/rte_cycles.h
new file mode 100644
index 00000000..a8009a06
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles.h
@@ -0,0 +1,42 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CYCLES_ARM_H_
+#define _RTE_CYCLES_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_cycles_64.h>
+#else
+#include <rte_cycles_32.h>
+#endif
+
+#endif /* _RTE_CYCLES_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
new file mode 100644
index 00000000..9c1be71e
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
@@ -0,0 +1,121 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CYCLES_ARM32_H_
+#define _RTE_CYCLES_ARM32_H_
+
+/* ARM v7 does not have suitable source of clock signals. The only clock counter
+ available in the core is 32 bit wide. Therefore it is unsuitable as the
+ counter overlaps every few seconds and probably is not accessible by
+ userspace programs. Therefore we use clock_gettime(CLOCK_MONOTONIC_RAW) to
+ simulate counter running at 1GHz.
+*/
+
+#include <time.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ * The time base for this lcore.
+ */
+#ifndef RTE_ARM_EAL_RDTSC_USE_PMU
+
+/**
+ * This call is easily portable to any ARM architecture, however,
+ * it may be damn slow and inprecise for some tasks.
+ */
+static inline uint64_t
+__rte_rdtsc_syscall(void)
+{
+ struct timespec val;
+ uint64_t v;
+
+ while (clock_gettime(CLOCK_MONOTONIC_RAW, &val) != 0)
+ /* no body */;
+
+ v = (uint64_t) val.tv_sec * 1000000000LL;
+ v += (uint64_t) val.tv_nsec;
+ return v;
+}
+#define rte_rdtsc __rte_rdtsc_syscall
+
+#else
+
+/**
+ * This function requires to configure the PMCCNTR and enable
+ * userspace access to it:
+ *
+ * asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r"(1));
+ * asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(29));
+ * asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x8000000f));
+ *
+ * which is possible only from the priviledged mode (kernel space).
+ */
+static inline uint64_t
+__rte_rdtsc_pmccntr(void)
+{
+ unsigned tsc;
+ uint64_t final_tsc;
+
+ /* Read PMCCNTR */
+ asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(tsc));
+ /* 1 tick = 64 clocks */
+ final_tsc = ((uint64_t)tsc) << 6;
+
+ return (uint64_t)final_tsc;
+}
+#define rte_rdtsc __rte_rdtsc_pmccntr
+
+#endif /* RTE_ARM_EAL_RDTSC_USE_PMU */
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+ rte_mb();
+ return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_ARM32_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
new file mode 100644
index 00000000..14f26120
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
@@ -0,0 +1,71 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium networks Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium networks nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CYCLES_ARM64_H_
+#define _RTE_CYCLES_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ * The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+ uint64_t tsc;
+
+ asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
+ return tsc;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+ rte_mb();
+ return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_ARM64_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h
new file mode 100644
index 00000000..1d562c3f
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h
@@ -0,0 +1,42 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_ARM_H_
+#define _RTE_MEMCPY_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_memcpy_64.h>
+#else
+#include <rte_memcpy_32.h>
+#endif
+
+#endif /* _RTE_MEMCPY_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
new file mode 100644
index 00000000..988125b3
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
@@ -0,0 +1,338 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_ARM32_H_
+#define _RTE_MEMCPY_ARM32_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcpy.h"
+
+#ifdef RTE_ARCH_ARM_NEON_MEMCPY
+
+#ifndef RTE_MACHINE_CPUFLAG_NEON
+#error "Cannot optimize memcpy by NEON as the CPU seems to not support this"
+#endif
+
+/* ARM NEON Intrinsics are used to copy data */
+#include <arm_neon.h>
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ vst1q_u8(dst, vld1q_u8(src));
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ asm volatile (
+ "vld1.8 {d0-d3}, [%0]\n\t"
+ "vst1.8 {d0-d3}, [%1]\n\t"
+ : "+r" (src), "+r" (dst)
+ : : "memory", "d0", "d1", "d2", "d3");
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ asm volatile (
+ "vld1.8 {d0-d3}, [%0]!\n\t"
+ "vld1.8 {d4-d5}, [%0]\n\t"
+ "vst1.8 {d0-d3}, [%1]!\n\t"
+ "vst1.8 {d4-d5}, [%1]\n\t"
+ : "+r" (src), "+r" (dst)
+ :
+ : "memory", "d0", "d1", "d2", "d3", "d4", "d5");
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ asm volatile (
+ "vld1.8 {d0-d3}, [%0]!\n\t"
+ "vld1.8 {d4-d7}, [%0]\n\t"
+ "vst1.8 {d0-d3}, [%1]!\n\t"
+ "vst1.8 {d4-d7}, [%1]\n\t"
+ : "+r" (src), "+r" (dst)
+ :
+ : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7");
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ asm volatile ("pld [%0, #64]" : : "r" (src));
+ asm volatile (
+ "vld1.8 {d0-d3}, [%0]!\n\t"
+ "vld1.8 {d4-d7}, [%0]!\n\t"
+ "vld1.8 {d8-d11}, [%0]!\n\t"
+ "vld1.8 {d12-d15}, [%0]\n\t"
+ "vst1.8 {d0-d3}, [%1]!\n\t"
+ "vst1.8 {d4-d7}, [%1]!\n\t"
+ "vst1.8 {d8-d11}, [%1]!\n\t"
+ "vst1.8 {d12-d15}, [%1]\n\t"
+ : "+r" (src), "+r" (dst)
+ :
+ : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+ "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15");
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ asm volatile ("pld [%0, #64]" : : "r" (src));
+ asm volatile ("pld [%0, #128]" : : "r" (src));
+ asm volatile ("pld [%0, #192]" : : "r" (src));
+ asm volatile ("pld [%0, #256]" : : "r" (src));
+ asm volatile ("pld [%0, #320]" : : "r" (src));
+ asm volatile ("pld [%0, #384]" : : "r" (src));
+ asm volatile ("pld [%0, #448]" : : "r" (src));
+ asm volatile (
+ "vld1.8 {d0-d3}, [%0]!\n\t"
+ "vld1.8 {d4-d7}, [%0]!\n\t"
+ "vld1.8 {d8-d11}, [%0]!\n\t"
+ "vld1.8 {d12-d15}, [%0]!\n\t"
+ "vld1.8 {d16-d19}, [%0]!\n\t"
+ "vld1.8 {d20-d23}, [%0]!\n\t"
+ "vld1.8 {d24-d27}, [%0]!\n\t"
+ "vld1.8 {d28-d31}, [%0]\n\t"
+ "vst1.8 {d0-d3}, [%1]!\n\t"
+ "vst1.8 {d4-d7}, [%1]!\n\t"
+ "vst1.8 {d8-d11}, [%1]!\n\t"
+ "vst1.8 {d12-d15}, [%1]!\n\t"
+ "vst1.8 {d16-d19}, [%1]!\n\t"
+ "vst1.8 {d20-d23}, [%1]!\n\t"
+ "vst1.8 {d24-d27}, [%1]!\n\t"
+ "vst1.8 {d28-d31}, [%1]!\n\t"
+ : "+r" (src), "+r" (dst)
+ :
+ : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+ "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+ "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+ "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31");
+}
+
+#define rte_memcpy(dst, src, n) \
+ ({ (__builtin_constant_p(n)) ? \
+ memcpy((dst), (src), (n)) : \
+ rte_memcpy_func((dst), (src), (n)); })
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+ void *ret = dst;
+
+ /* We can't copy < 16 bytes using XMM registers so do it manually. */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(uint8_t *)dst = *(const uint8_t *)src;
+ dst = (uint8_t *)dst + 1;
+ src = (const uint8_t *)src + 1;
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dst = *(const uint16_t *)src;
+ dst = (uint16_t *)dst + 1;
+ src = (const uint16_t *)src + 1;
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dst = *(const uint32_t *)src;
+ dst = (uint32_t *)dst + 1;
+ src = (const uint32_t *)src + 1;
+ }
+ if (n & 0x08) {
+ /* ARMv7 can not handle unaligned access to long long
+ * (uint64_t). Therefore two uint32_t operations are
+ * used.
+ */
+ *(uint32_t *)dst = *(const uint32_t *)src;
+ dst = (uint32_t *)dst + 1;
+ src = (const uint32_t *)src + 1;
+ *(uint32_t *)dst = *(const uint32_t *)src;
+ }
+ return ret;
+ }
+
+ /* Special fast cases for <= 128 bytes */
+ if (n <= 32) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+
+ if (n <= 64) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+ return ret;
+ }
+
+ if (n <= 128) {
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov64((uint8_t *)dst - 64 + n,
+ (const uint8_t *)src - 64 + n);
+ return ret;
+ }
+
+ /*
+ * For large copies > 128 bytes. This combination of 256, 64 and 16 byte
+ * copies was found to be faster than doing 128 and 32 byte copies as
+ * well.
+ */
+ for ( ; n >= 256; n -= 256) {
+ rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+ dst = (uint8_t *)dst + 256;
+ src = (const uint8_t *)src + 256;
+ }
+
+ /*
+ * We split the remaining bytes (which will be less than 256) into
+ * 64byte (2^6) chunks.
+ * Using incrementing integers in the case labels of a switch statement
+ * enourages the compiler to use a jump table. To get incrementing
+ * integers, we shift the 2 relevant bits to the LSB position to first
+ * get decrementing integers, and then subtract.
+ */
+ switch (3 - (n >> 6)) {
+ case 0x00:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ case 0x01:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ case 0x02:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ default:
+ break;
+ }
+
+ /*
+ * We split the remaining bytes (which will be less than 64) into
+ * 16byte (2^4) chunks, using the same switch structure as above.
+ */
+ switch (3 - (n >> 4)) {
+ case 0x00:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ case 0x01:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ case 0x02:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ default:
+ break;
+ }
+
+ /* Copy any remaining bytes, without going beyond end of buffers */
+ if (n != 0)
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+}
+
+#else
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 256);
+}
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+ return memcpy(dst, src, n);
+}
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+ return memcpy(dst, src, n);
+}
+
+#endif /* RTE_ARCH_ARM_NEON_MEMCPY */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_ARM32_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h
new file mode 100644
index 00000000..917cdc1b
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h
@@ -0,0 +1,93 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium networks Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium networks nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCPY_ARM64_H_
+#define _RTE_MEMCPY_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+#include "generic/rte_memcpy.h"
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 256);
+}
+
+#define rte_memcpy(d, s, n) memcpy((d), (s), (n))
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+ return memcpy(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_ARM_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h
new file mode 100644
index 00000000..aa37de57
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h
@@ -0,0 +1,42 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PREFETCH_ARM_H_
+#define _RTE_PREFETCH_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_prefetch_64.h>
+#else
+#include <rte_prefetch_32.h>
+#endif
+
+#endif /* _RTE_PREFETCH_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
new file mode 100644
index 00000000..5aeed22d
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
@@ -0,0 +1,67 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PREFETCH_ARM32_H_
+#define _RTE_PREFETCH_ARM32_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+ asm volatile ("pld [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+ asm volatile ("pld [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+ asm volatile ("pld [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ /* non-temporal version not available, fallback to rte_prefetch0 */
+ rte_prefetch0(p);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_ARM32_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
new file mode 100644
index 00000000..3ed46a46
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
@@ -0,0 +1,66 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium networks Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium networks nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PREFETCH_ARM_64_H_
+#define _RTE_PREFETCH_ARM_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+ asm volatile ("PRFM PLDL1KEEP, [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+ asm volatile ("PRFM PLDL2KEEP, [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+ asm volatile ("PRFM PLDL3KEEP, [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_ARM_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_rwlock.h b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h
new file mode 100644
index 00000000..664bec88
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h
@@ -0,0 +1,40 @@
+/* copied from ppc_64 */
+
+#ifndef _RTE_RWLOCK_ARM_H_
+#define _RTE_RWLOCK_ARM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_unlock(rwl);
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_unlock(rwl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
new file mode 100644
index 00000000..396a42e8
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
@@ -0,0 +1,92 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_SPINLOCK_ARM_H_
+#define _RTE_SPINLOCK_ARM_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
+static inline int rte_tm_supported(void)
+{
+ return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+ return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_unlock(sl);
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_unlock(slr);
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+ return rte_spinlock_recursive_trylock(slr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
new file mode 100644
index 00000000..a33c0544
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -0,0 +1,83 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Cavium Networks. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium Networks nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_VECT_ARM_H_
+#define _RTE_VECT_ARM_H_
+
+#include "arm_neon.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int32x4_t xmm_t;
+
+#define XMM_SIZE (sizeof(xmm_t))
+#define XMM_MASK (XMM_SIZE - 1)
+
+typedef union rte_xmm {
+ xmm_t x;
+ uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
+ uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+ uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+ uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+ double pd[XMM_SIZE / sizeof(double)];
+} __attribute__((aligned(16))) rte_xmm_t;
+
+#ifdef RTE_ARCH_ARM
+/* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
+static __inline uint8x16_t
+vqtbl1q_u8(uint8x16_t a, uint8x16_t b)
+{
+ uint8_t i, pos;
+ rte_xmm_t rte_a, rte_b, rte_ret;
+
+ vst1q_u8(rte_a.u8, a);
+ vst1q_u8(rte_b.u8, b);
+
+ for (i = 0; i < 16; i++) {
+ pos = rte_b.u8[i];
+ if (pos < 16)
+ rte_ret.u8[i] = rte_a.u8[pos];
+ else
+ rte_ret.u8[i] = 0;
+ }
+
+ return vld1q_u8(rte_ret.u8);
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
new file mode 100644
index 00000000..feae4868
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -0,0 +1,432 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * Inspired from FreeBSD src/sys/powerpc/include/atomic.h
+ * Copyright (c) 2008 Marcel Moolenaar
+ * Copyright (c) 2001 Benno Rice
+ * Copyright (c) 2001 David E. O'Brien
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_PPC_64_H_
+#define _RTE_ATOMIC_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ */
+#define rte_mb() {asm volatile("sync" : : : "memory"); }
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ */
+#define rte_wmb() {asm volatile("sync" : : : "memory"); }
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ */
+#define rte_rmb() {asm volatile("sync" : : : "memory"); }
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_compiler_barrier()
+
+#define rte_smp_rmb() rte_compiler_barrier()
+
+/*------------------------- 16 bit atomic operations -------------------------*/
+/* To be compatible with Power7, use GCC built-in functions for 16 bit
+ * operations */
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+ return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE,
+ __ATOMIC_ACQUIRE) ? 1 : 0;
+}
+
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+ return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+ __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+ __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+ return __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
+}
+
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+ return __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
+}
+
+/*------------------------- 32 bit atomic operations -------------------------*/
+
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+ unsigned int ret = 0;
+
+ asm volatile(
+ "\tlwsync\n"
+ "1:\tlwarx %[ret], 0, %[dst]\n"
+ "cmplw %[exp], %[ret]\n"
+ "bne 2f\n"
+ "stwcx. %[src], 0, %[dst]\n"
+ "bne- 1b\n"
+ "li %[ret], 1\n"
+ "b 3f\n"
+ "2:\n"
+ "stwcx. %[ret], 0, %[dst]\n"
+ "li %[ret], 0\n"
+ "3:\n"
+ "isync\n"
+ : [ret] "=&r" (ret), "=m" (*dst)
+ : [dst] "r" (dst),
+ [exp] "r" (exp),
+ [src] "r" (src),
+ "m" (*dst)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
+{
+ return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v)
+{
+ int t;
+
+ asm volatile(
+ "1: lwarx %[t],0,%[cnt]\n"
+ "addic %[t],%[t],1\n"
+ "stwcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "=m" (v->cnt)
+ : [cnt] "r" (&v->cnt), "m" (v->cnt)
+ : "cc", "xer", "memory");
+}
+
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v)
+{
+ int t;
+
+ asm volatile(
+ "1: lwarx %[t],0,%[cnt]\n"
+ "addic %[t],%[t],-1\n"
+ "stwcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "=m" (v->cnt)
+ : [cnt] "r" (&v->cnt), "m" (v->cnt)
+ : "cc", "xer", "memory");
+}
+
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
+{
+ int ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: lwarx %[ret],0,%[cnt]\n"
+ "addic %[ret],%[ret],1\n"
+ "stwcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [cnt] "r" (&v->cnt)
+ : "cc", "xer", "memory");
+
+ return ret == 0;
+}
+
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
+{
+ int ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: lwarx %[ret],0,%[cnt]\n"
+ "addic %[ret],%[ret],-1\n"
+ "stwcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [cnt] "r" (&v->cnt)
+ : "cc", "xer", "memory");
+
+ return ret == 0;
+}
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+ unsigned int ret = 0;
+
+ asm volatile (
+ "\tlwsync\n"
+ "1: ldarx %[ret], 0, %[dst]\n"
+ "cmpld %[exp], %[ret]\n"
+ "bne 2f\n"
+ "stdcx. %[src], 0, %[dst]\n"
+ "bne- 1b\n"
+ "li %[ret], 1\n"
+ "b 3f\n"
+ "2:\n"
+ "stdcx. %[ret], 0, %[dst]\n"
+ "li %[ret], 0\n"
+ "3:\n"
+ "isync\n"
+ : [ret] "=&r" (ret), "=m" (*dst)
+ : [dst] "r" (dst),
+ [exp] "r" (exp),
+ [src] "r" (src),
+ "m" (*dst)
+ : "cc", "memory");
+ return ret;
+}
+
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+ v->cnt = 0;
+}
+
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+ long ret;
+
+ asm volatile("ld%U1%X1 %[ret],%[cnt]"
+ : [ret] "=r"(ret)
+ : [cnt] "m"(v->cnt));
+
+ return ret;
+}
+
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+ asm volatile("std%U0%X0 %[new_value],%[cnt]"
+ : [cnt] "=m"(v->cnt)
+ : [new_value] "r"(new_value));
+}
+
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+ long t;
+
+ asm volatile(
+ "1: ldarx %[t],0,%[cnt]\n"
+ "add %[t],%[inc],%[t]\n"
+ "stdcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "=m" (v->cnt)
+ : [cnt] "r" (&v->cnt), [inc] "r" (inc), "m" (v->cnt)
+ : "cc", "memory");
+}
+
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+ long t;
+
+ asm volatile(
+ "1: ldarx %[t],0,%[cnt]\n"
+ "subf %[t],%[dec],%[t]\n"
+ "stdcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "+m" (v->cnt)
+ : [cnt] "r" (&v->cnt), [dec] "r" (dec), "m" (v->cnt)
+ : "cc", "memory");
+}
+
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+ long t;
+
+ asm volatile(
+ "1: ldarx %[t],0,%[cnt]\n"
+ "addic %[t],%[t],1\n"
+ "stdcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "+m" (v->cnt)
+ : [cnt] "r" (&v->cnt), "m" (v->cnt)
+ : "cc", "xer", "memory");
+}
+
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+ long t;
+
+ asm volatile(
+ "1: ldarx %[t],0,%[cnt]\n"
+ "addic %[t],%[t],-1\n"
+ "stdcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "+m" (v->cnt)
+ : [cnt] "r" (&v->cnt), "m" (v->cnt)
+ : "cc", "xer", "memory");
+}
+
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+ long ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: ldarx %[ret],0,%[cnt]\n"
+ "add %[ret],%[inc],%[ret]\n"
+ "stdcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [inc] "r" (inc), [cnt] "r" (&v->cnt)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+ long ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: ldarx %[ret],0,%[cnt]\n"
+ "subf %[ret],%[dec],%[ret]\n"
+ "stdcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [dec] "r" (dec), [cnt] "r" (&v->cnt)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+ long ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: ldarx %[ret],0,%[cnt]\n"
+ "addic %[ret],%[ret],1\n"
+ "stdcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [cnt] "r" (&v->cnt)
+ : "cc", "xer", "memory");
+
+ return ret == 0;
+}
+
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+ long ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: ldarx %[ret],0,%[cnt]\n"
+ "addic %[ret],%[ret],-1\n"
+ "stdcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [cnt] "r" (&v->cnt)
+ : "cc", "xer", "memory");
+
+ return ret == 0;
+}
+
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+ return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+
+/**
+ * Atomically set a 64-bit counter to 0.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+ v->cnt = 0;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
new file mode 100644
index 00000000..3c1734ed
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
@@ -0,0 +1,149 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Inspired from FreeBSD src/sys/powerpc/include/endian.h
+ * Copyright (c) 1987, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+*/
+
+#ifndef _RTE_BYTEORDER_PPC_64_H_
+#define _RTE_BYTEORDER_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_byteorder.h"
+
+/*
+ * An architecture-optimized byte swap for a 16-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap16().
+ */
+static inline uint16_t rte_arch_bswap16(uint16_t _x)
+{
+ return (_x >> 8) | ((_x << 8) & 0xff00);
+}
+
+/*
+ * An architecture-optimized byte swap for a 32-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap32().
+ */
+static inline uint32_t rte_arch_bswap32(uint32_t _x)
+{
+ return (_x >> 24) | ((_x >> 8) & 0xff00) | ((_x << 8) & 0xff0000) |
+ ((_x << 24) & 0xff000000);
+}
+
+/*
+ * An architecture-optimized byte swap for a 64-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap64().
+ */
+/* 64-bit mode */
+static inline uint64_t rte_arch_bswap64(uint64_t _x)
+{
+ return (_x >> 56) | ((_x >> 40) & 0xff00) | ((_x >> 24) & 0xff0000) |
+ ((_x >> 8) & 0xff000000) | ((_x << 8) & (0xffULL << 32)) |
+ ((_x << 24) & (0xffULL << 40)) |
+ ((_x << 40) & (0xffULL << 48)) | ((_x << 56));
+}
+
+#ifndef RTE_FORCE_INTRINSICS
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap16(x) : \
+ rte_arch_bswap16(x)))
+
+#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap32(x) : \
+ rte_arch_bswap32(x)))
+
+#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap64(x) : \
+ rte_arch_bswap64(x)))
+#else
+/*
+ * __builtin_bswap16 is only available gcc 4.8 and upwards
+ */
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap16(x) : \
+ rte_arch_bswap16(x)))
+#endif
+#endif
+
+/* Power 8 have both little endian and big endian mode
+ * Power 7 only support big endian
+ */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define rte_cpu_to_le_16(x) (x)
+#define rte_cpu_to_le_32(x) (x)
+#define rte_cpu_to_le_64(x) (x)
+
+#define rte_cpu_to_be_16(x) rte_bswap16(x)
+#define rte_cpu_to_be_32(x) rte_bswap32(x)
+#define rte_cpu_to_be_64(x) rte_bswap64(x)
+
+#define rte_le_to_cpu_16(x) (x)
+#define rte_le_to_cpu_32(x) (x)
+#define rte_le_to_cpu_64(x) (x)
+
+#define rte_be_to_cpu_16(x) rte_bswap16(x)
+#define rte_be_to_cpu_32(x) rte_bswap32(x)
+#define rte_be_to_cpu_64(x) rte_bswap64(x)
+
+#else /* RTE_BIG_ENDIAN */
+
+#define rte_cpu_to_le_16(x) rte_bswap16(x)
+#define rte_cpu_to_le_32(x) rte_bswap32(x)
+#define rte_cpu_to_le_64(x) rte_bswap64(x)
+
+#define rte_cpu_to_be_16(x) (x)
+#define rte_cpu_to_be_32(x) (x)
+#define rte_cpu_to_be_64(x) (x)
+
+#define rte_le_to_cpu_16(x) rte_bswap16(x)
+#define rte_le_to_cpu_32(x) rte_bswap32(x)
+#define rte_le_to_cpu_64(x) rte_bswap64(x)
+
+#define rte_be_to_cpu_16(x) (x)
+#define rte_be_to_cpu_32(x) (x)
+#define rte_be_to_cpu_64(x) (x)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
new file mode 100644
index 00000000..7cc2b3c5
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
@@ -0,0 +1,88 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CPUFLAGS_PPC_64_H_
+#define _RTE_CPUFLAGS_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+ RTE_CPUFLAG_PPC_LE = 0,
+ RTE_CPUFLAG_TRUE_LE,
+ RTE_CPUFLAG_PSERIES_PERFMON_COMPAT,
+ RTE_CPUFLAG_VSX,
+ RTE_CPUFLAG_ARCH_2_06,
+ RTE_CPUFLAG_POWER6_EXT,
+ RTE_CPUFLAG_DFP,
+ RTE_CPUFLAG_PA6T,
+ RTE_CPUFLAG_ARCH_2_05,
+ RTE_CPUFLAG_ICACHE_SNOOP,
+ RTE_CPUFLAG_SMT,
+ RTE_CPUFLAG_BOOKE,
+ RTE_CPUFLAG_CELLBE,
+ RTE_CPUFLAG_POWER5_PLUS,
+ RTE_CPUFLAG_POWER5,
+ RTE_CPUFLAG_POWER4,
+ RTE_CPUFLAG_NOTB,
+ RTE_CPUFLAG_EFP_DOUBLE,
+ RTE_CPUFLAG_EFP_SINGLE,
+ RTE_CPUFLAG_SPE,
+ RTE_CPUFLAG_UNIFIED_CACHE,
+ RTE_CPUFLAG_4xxMAC,
+ RTE_CPUFLAG_MMU,
+ RTE_CPUFLAG_FPU,
+ RTE_CPUFLAG_ALTIVEC,
+ RTE_CPUFLAG_PPC601,
+ RTE_CPUFLAG_PPC64,
+ RTE_CPUFLAG_PPC32,
+ RTE_CPUFLAG_TAR,
+ RTE_CPUFLAG_LSEL,
+ RTE_CPUFLAG_EBB,
+ RTE_CPUFLAG_DSCR,
+ RTE_CPUFLAG_HTM,
+ RTE_CPUFLAG_ARCH_2_07,
+ /* The last item */
+ RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
new file mode 100644
index 00000000..64beddf9
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
@@ -0,0 +1,94 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CYCLES_PPC_64_H_
+#define _RTE_CYCLES_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+#include <rte_byteorder.h>
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ * The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+ union {
+ uint64_t tsc_64;
+ struct {
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ uint32_t hi_32;
+ uint32_t lo_32;
+#else
+ uint32_t lo_32;
+ uint32_t hi_32;
+#endif
+ };
+ } tsc;
+ uint32_t tmp;
+
+ asm volatile(
+ "0:\n"
+ "mftbu %[hi32]\n"
+ "mftb %[lo32]\n"
+ "mftbu %[tmp]\n"
+ "cmpw %[tmp],%[hi32]\n"
+ "bne 0b\n"
+ : [hi32] "=r"(tsc.hi_32), [lo32] "=r"(tsc.lo_32),
+ [tmp] "=r"(tmp)
+ );
+ return tsc.tsc_64;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+ rte_mb();
+ return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
new file mode 100644
index 00000000..acf7aac2
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
@@ -0,0 +1,225 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCPY_PPC_64_H_
+#define _RTE_MEMCPY_PPC_64_H_
+
+#include <stdint.h>
+#include <string.h>
+/*To include altivec.h, GCC version must >= 4.8 */
+#include <altivec.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcpy.h"
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+ vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+ vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+ vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+ vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+ vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+ vec_vsx_st(vec_vsx_ld(48, src), 48, dst);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+ vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+ vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+ vec_vsx_st(vec_vsx_ld(48, src), 48, dst);
+ vec_vsx_st(vec_vsx_ld(64, src), 64, dst);
+ vec_vsx_st(vec_vsx_ld(80, src), 80, dst);
+ vec_vsx_st(vec_vsx_ld(96, src), 96, dst);
+ vec_vsx_st(vec_vsx_ld(112, src), 112, dst);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov128(dst, src);
+ rte_mov128(dst + 128, src + 128);
+}
+
+#define rte_memcpy(dst, src, n) \
+ ({ (__builtin_constant_p(n)) ? \
+ memcpy((dst), (src), (n)) : \
+ rte_memcpy_func((dst), (src), (n)); })
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+ void *ret = dst;
+
+ /* We can't copy < 16 bytes using XMM registers so do it manually. */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(uint8_t *)dst = *(const uint8_t *)src;
+ dst = (uint8_t *)dst + 1;
+ src = (const uint8_t *)src + 1;
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dst = *(const uint16_t *)src;
+ dst = (uint16_t *)dst + 1;
+ src = (const uint16_t *)src + 1;
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dst = *(const uint32_t *)src;
+ dst = (uint32_t *)dst + 1;
+ src = (const uint32_t *)src + 1;
+ }
+ if (n & 0x08)
+ *(uint64_t *)dst = *(const uint64_t *)src;
+ return ret;
+ }
+
+ /* Special fast cases for <= 128 bytes */
+ if (n <= 32) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+
+ if (n <= 64) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+ return ret;
+ }
+
+ if (n <= 128) {
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov64((uint8_t *)dst - 64 + n,
+ (const uint8_t *)src - 64 + n);
+ return ret;
+ }
+
+ /*
+ * For large copies > 128 bytes. This combination of 256, 64 and 16 byte
+ * copies was found to be faster than doing 128 and 32 byte copies as
+ * well.
+ */
+ for ( ; n >= 256; n -= 256) {
+ rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+ dst = (uint8_t *)dst + 256;
+ src = (const uint8_t *)src + 256;
+ }
+
+ /*
+ * We split the remaining bytes (which will be less than 256) into
+ * 64byte (2^6) chunks.
+ * Using incrementing integers in the case labels of a switch statement
+ * enourages the compiler to use a jump table. To get incrementing
+ * integers, we shift the 2 relevant bits to the LSB position to first
+ * get decrementing integers, and then subtract.
+ */
+ switch (3 - (n >> 6)) {
+ case 0x00:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ case 0x01:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ case 0x02:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ default:
+ ;
+ }
+
+ /*
+ * We split the remaining bytes (which will be less than 64) into
+ * 16byte (2^4) chunks, using the same switch structure as above.
+ */
+ switch (3 - (n >> 4)) {
+ case 0x00:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ case 0x01:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ case 0x02:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ default:
+ ;
+ }
+
+ /* Copy any remaining bytes, without going beyond end of buffers */
+ if (n != 0)
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
new file mode 100644
index 00000000..9a1995ea
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -0,0 +1,67 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_PREFETCH_PPC_64_H_
+#define _RTE_PREFETCH_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+ asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+ asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+ asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ /* non-temporal version not available, fallback to rte_prefetch0 */
+ rte_prefetch0(p);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h
new file mode 100644
index 00000000..de8af19e
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h
@@ -0,0 +1,38 @@
+#ifndef _RTE_RWLOCK_PPC_64_H_
+#define _RTE_RWLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_unlock(rwl);
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_unlock(rwl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
new file mode 100644
index 00000000..af139c9d
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
@@ -0,0 +1,114 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_SPINLOCK_PPC_64_H_
+#define _RTE_SPINLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
+/* Fixme: Use intrinsics to implement the spinlock on Power architecture */
+
+#ifndef RTE_FORCE_INTRINSICS
+
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+ while (__sync_lock_test_and_set(&sl->locked, 1))
+ while (sl->locked)
+ rte_pause();
+}
+
+static inline void
+rte_spinlock_unlock(rte_spinlock_t *sl)
+{
+ __sync_lock_release(&sl->locked);
+}
+
+static inline int
+rte_spinlock_trylock(rte_spinlock_t *sl)
+{
+ return __sync_lock_test_and_set(&sl->locked, 1) == 0;
+}
+
+#endif
+
+static inline int rte_tm_supported(void)
+{
+ return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+ return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_unlock(sl);
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_unlock(slr);
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+ return rte_spinlock_recursive_trylock(slr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_atomic.h b/lib/librte_eal/common/include/arch/tile/rte_atomic.h
new file mode 100644
index 00000000..28825ff6
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_atomic.h
@@ -0,0 +1,92 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of EZchip Semiconductor nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_ATOMIC_TILE_H_
+#define _RTE_ATOMIC_TILE_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_mb(void)
+{
+ __sync_synchronize();
+}
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_wmb(void)
+{
+ __sync_synchronize();
+}
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_rmb(void)
+{
+ __sync_synchronize();
+}
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_compiler_barrier()
+
+#define rte_smp_rmb() rte_compiler_barrier()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_byteorder.h b/lib/librte_eal/common/include/arch/tile/rte_byteorder.h
new file mode 100644
index 00000000..7239e437
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_byteorder.h
@@ -0,0 +1,91 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of EZchip Semiconductor nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_BYTEORDER_TILE_H_
+#define _RTE_BYTEORDER_TILE_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_byteorder.h"
+
+#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+#define rte_bswap16(x) rte_constant_bswap16(x)
+#endif
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define rte_cpu_to_le_16(x) (x)
+#define rte_cpu_to_le_32(x) (x)
+#define rte_cpu_to_le_64(x) (x)
+
+#define rte_cpu_to_be_16(x) rte_bswap16(x)
+#define rte_cpu_to_be_32(x) rte_bswap32(x)
+#define rte_cpu_to_be_64(x) rte_bswap64(x)
+
+#define rte_le_to_cpu_16(x) (x)
+#define rte_le_to_cpu_32(x) (x)
+#define rte_le_to_cpu_64(x) (x)
+
+#define rte_be_to_cpu_16(x) rte_bswap16(x)
+#define rte_be_to_cpu_32(x) rte_bswap32(x)
+#define rte_be_to_cpu_64(x) rte_bswap64(x)
+
+#else /* RTE_BIG_ENDIAN */
+
+#define rte_cpu_to_le_16(x) rte_bswap16(x)
+#define rte_cpu_to_le_32(x) rte_bswap32(x)
+#define rte_cpu_to_le_64(x) rte_bswap64(x)
+
+#define rte_cpu_to_be_16(x) (x)
+#define rte_cpu_to_be_32(x) (x)
+#define rte_cpu_to_be_64(x) (x)
+
+#define rte_le_to_cpu_16(x) rte_bswap16(x)
+#define rte_le_to_cpu_32(x) rte_bswap32(x)
+#define rte_le_to_cpu_64(x) rte_bswap64(x)
+
+#define rte_be_to_cpu_16(x) (x)
+#define rte_be_to_cpu_32(x) (x)
+#define rte_be_to_cpu_64(x) (x)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h b/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h
new file mode 100644
index 00000000..1849b520
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h
@@ -0,0 +1,53 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of EZchip Semiconductor nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CPUFLAGS_TILE_H_
+#define _RTE_CPUFLAGS_TILE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+ RTE_CPUFLAG_NUMFLAGS /**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_cycles.h b/lib/librte_eal/common/include/arch/tile/rte_cycles.h
new file mode 100644
index 00000000..0b2200a3
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_cycles.h
@@ -0,0 +1,70 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of EZchip Semiconductor nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CYCLES_TILE_H_
+#define _RTE_CYCLES_TILE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <arch/cycle.h>
+
+#include "generic/rte_cycles.h"
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ * The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+ return get_cycle_count();
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+ rte_mb();
+ return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h
new file mode 100644
index 00000000..9b5b37ef
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h
@@ -0,0 +1,93 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of EZchip Semiconductor nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCPY_TILE_H_
+#define _RTE_MEMCPY_TILE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+#include "generic/rte_memcpy.h"
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 256);
+}
+
+#define rte_memcpy(d, s, n) memcpy((d), (s), (n))
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+ return memcpy(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
new file mode 100644
index 00000000..7a1bb93e
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
@@ -0,0 +1,67 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of EZchip Semiconductor nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_PREFETCH_TILE_H_
+#define _RTE_PREFETCH_TILE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+ __builtin_prefetch((const void *)(uintptr_t)p, 0, 3);
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+ __builtin_prefetch((const void *)(uintptr_t)p, 0, 2);
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+ __builtin_prefetch((const void *)(uintptr_t)p, 0, 1);
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ /* non-temporal version not available, fallback to rte_prefetch0 */
+ rte_prefetch0(p);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_rwlock.h b/lib/librte_eal/common/include/arch/tile/rte_rwlock.h
new file mode 100644
index 00000000..8f67a190
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_rwlock.h
@@ -0,0 +1,70 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of EZchip Semiconductor nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_RWLOCK_TILE_H_
+#define _RTE_RWLOCK_TILE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_unlock(rwl);
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_unlock(rwl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_spinlock.h b/lib/librte_eal/common/include/arch/tile/rte_spinlock.h
new file mode 100644
index 00000000..e91f99ee
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_spinlock.h
@@ -0,0 +1,92 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of EZchip Semiconductor nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_SPINLOCK_TILE_H_
+#define _RTE_SPINLOCK_TILE_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
+static inline int rte_tm_supported(void)
+{
+ return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+ return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_unlock(sl);
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_unlock(slr);
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+ return rte_spinlock_recursive_trylock(slr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
new file mode 100644
index 00000000..b20056b8
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
@@ -0,0 +1,222 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ATOMIC_X86_H_
+#define _RTE_ATOMIC_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <emmintrin.h>
+#include "generic/rte_atomic.h"
+
+#if RTE_MAX_LCORE == 1
+#define MPLOCKED /**< No need to insert MP lock prefix. */
+#else
+#define MPLOCKED "lock ; " /**< Insert MP lock prefix. */
+#endif
+
+#define rte_mb() _mm_mfence()
+
+#define rte_wmb() _mm_sfence()
+
+#define rte_rmb() _mm_lfence()
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_compiler_barrier()
+
+#define rte_smp_rmb() rte_compiler_barrier()
+
+/*------------------------- 16 bit atomic operations -------------------------*/
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+ uint8_t res;
+
+ asm volatile(
+ MPLOCKED
+ "cmpxchgw %[src], %[dst];"
+ "sete %[res];"
+ : [res] "=a" (res), /* output */
+ [dst] "=m" (*dst)
+ : [src] "r" (src), /* input */
+ "a" (exp),
+ "m" (*dst)
+ : "memory"); /* no-clobber list */
+ return res;
+}
+
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+ return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "incw %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "decw %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(
+ MPLOCKED
+ "incw %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+ return ret != 0;
+}
+
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(MPLOCKED
+ "decw %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+ return ret != 0;
+}
+
+/*------------------------- 32 bit atomic operations -------------------------*/
+
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+ uint8_t res;
+
+ asm volatile(
+ MPLOCKED
+ "cmpxchgl %[src], %[dst];"
+ "sete %[res];"
+ : [res] "=a" (res), /* output */
+ [dst] "=m" (*dst)
+ : [src] "r" (src), /* input */
+ "a" (exp),
+ "m" (*dst)
+ : "memory"); /* no-clobber list */
+ return res;
+}
+
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
+{
+ return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "incl %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "decl %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(
+ MPLOCKED
+ "incl %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+ return ret != 0;
+}
+
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(MPLOCKED
+ "decl %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+ return ret != 0;
+}
+#endif
+
+#ifdef RTE_ARCH_I686
+#include "rte_atomic_32.h"
+#else
+#include "rte_atomic_64.h"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_X86_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
new file mode 100644
index 00000000..400d8a96
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
@@ -0,0 +1,222 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Inspired from FreeBSD src/sys/i386/include/atomic.h
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_I686_H_
+#define _RTE_ATOMIC_I686_H_
+
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+ uint8_t res;
+ union {
+ struct {
+ uint32_t l32;
+ uint32_t h32;
+ };
+ uint64_t u64;
+ } _exp, _src;
+
+ _exp.u64 = exp;
+ _src.u64 = src;
+
+#ifndef __PIC__
+ asm volatile (
+ MPLOCKED
+ "cmpxchg8b (%[dst]);"
+ "setz %[res];"
+ : [res] "=a" (res) /* result in eax */
+ : [dst] "S" (dst), /* esi */
+ "b" (_src.l32), /* ebx */
+ "c" (_src.h32), /* ecx */
+ "a" (_exp.l32), /* eax */
+ "d" (_exp.h32) /* edx */
+ : "memory" ); /* no-clobber list */
+#else
+ asm volatile (
+ "mov %%ebx, %%edi\n"
+ MPLOCKED
+ "cmpxchg8b (%[dst]);"
+ "setz %[res];"
+ "xchgl %%ebx, %%edi;\n"
+ : [res] "=a" (res) /* result in eax */
+ : [dst] "S" (dst), /* esi */
+ "D" (_src.l32), /* ebx */
+ "c" (_src.h32), /* ecx */
+ "a" (_exp.l32), /* eax */
+ "d" (_exp.h32) /* edx */
+ : "memory" ); /* no-clobber list */
+#endif
+
+ return res;
+}
+
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, 0);
+ }
+}
+
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ /* replace the value by itself */
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp);
+ }
+ return tmp;
+}
+
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, new_value);
+ }
+}
+
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp + inc);
+ }
+}
+
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp - dec);
+ }
+}
+
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+ rte_atomic64_add(v, 1);
+}
+
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+ rte_atomic64_sub(v, 1);
+}
+
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp + inc);
+ }
+
+ return tmp + inc;
+}
+
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp - dec);
+ }
+
+ return tmp - dec;
+}
+
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+ return rte_atomic64_add_return(v, 1) == 0;
+}
+
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+ return rte_atomic64_sub_return(v, 1) == 0;
+}
+
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+ return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+ rte_atomic64_set(v, 0);
+}
+#endif
+
+#endif /* _RTE_ATOMIC_I686_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
new file mode 100644
index 00000000..4de66000
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
@@ -0,0 +1,191 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Inspired from FreeBSD src/sys/amd64/include/atomic.h
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_X86_64_H_
+#define _RTE_ATOMIC_X86_64_H_
+
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+ uint8_t res;
+
+
+ asm volatile(
+ MPLOCKED
+ "cmpxchgq %[src], %[dst];"
+ "sete %[res];"
+ : [res] "=a" (res), /* output */
+ [dst] "=m" (*dst)
+ : [src] "r" (src), /* input */
+ "a" (exp),
+ "m" (*dst)
+ : "memory"); /* no-clobber list */
+
+ return res;
+}
+
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+ v->cnt = 0;
+}
+
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+ return v->cnt;
+}
+
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+ v->cnt = new_value;
+}
+
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+ asm volatile(
+ MPLOCKED
+ "addq %[inc], %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : [inc] "ir" (inc), /* input */
+ "m" (v->cnt)
+ );
+}
+
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+ asm volatile(
+ MPLOCKED
+ "subq %[dec], %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : [dec] "ir" (dec), /* input */
+ "m" (v->cnt)
+ );
+}
+
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "incq %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "decq %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+ int64_t prev = inc;
+
+ asm volatile(
+ MPLOCKED
+ "xaddq %[prev], %[cnt]"
+ : [prev] "+r" (prev), /* output */
+ [cnt] "=m" (v->cnt)
+ : "m" (v->cnt) /* input */
+ );
+ return prev + inc;
+}
+
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+ return rte_atomic64_add_return(v, -dec);
+}
+
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(
+ MPLOCKED
+ "incq %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+
+ return ret != 0;
+}
+
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(
+ MPLOCKED
+ "decq %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+ return ret != 0;
+}
+
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+ return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+ v->cnt = 0;
+}
+#endif
+
+#endif /* _RTE_ATOMIC_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h
new file mode 100644
index 00000000..ffdb6ef5
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h
@@ -0,0 +1,125 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BYTEORDER_X86_H_
+#define _RTE_BYTEORDER_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_byteorder.h"
+
+#ifndef RTE_BYTE_ORDER
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif
+
+/*
+ * An architecture-optimized byte swap for a 16-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap16().
+ */
+static inline uint16_t rte_arch_bswap16(uint16_t _x)
+{
+ register uint16_t x = _x;
+ asm volatile ("xchgb %b[x1],%h[x2]"
+ : [x1] "=Q" (x)
+ : [x2] "0" (x)
+ );
+ return x;
+}
+
+/*
+ * An architecture-optimized byte swap for a 32-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap32().
+ */
+static inline uint32_t rte_arch_bswap32(uint32_t _x)
+{
+ register uint32_t x = _x;
+ asm volatile ("bswap %[x]"
+ : [x] "+r" (x)
+ );
+ return x;
+}
+
+#ifndef RTE_FORCE_INTRINSICS
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap16(x) : \
+ rte_arch_bswap16(x)))
+
+#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap32(x) : \
+ rte_arch_bswap32(x)))
+
+#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap64(x) : \
+ rte_arch_bswap64(x)))
+#else
+/*
+ * __builtin_bswap16 is only available gcc 4.8 and upwards
+ */
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap16(x) : \
+ rte_arch_bswap16(x)))
+#endif
+#endif
+
+#define rte_cpu_to_le_16(x) (x)
+#define rte_cpu_to_le_32(x) (x)
+#define rte_cpu_to_le_64(x) (x)
+
+#define rte_cpu_to_be_16(x) rte_bswap16(x)
+#define rte_cpu_to_be_32(x) rte_bswap32(x)
+#define rte_cpu_to_be_64(x) rte_bswap64(x)
+
+#define rte_le_to_cpu_16(x) (x)
+#define rte_le_to_cpu_32(x) (x)
+#define rte_le_to_cpu_64(x) (x)
+
+#define rte_be_to_cpu_16(x) rte_bswap16(x)
+#define rte_be_to_cpu_32(x) rte_bswap32(x)
+#define rte_be_to_cpu_64(x) rte_bswap64(x)
+
+#ifdef RTE_ARCH_I686
+#include "rte_byteorder_32.h"
+#else
+#include "rte_byteorder_64.h"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_X86_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
new file mode 100644
index 00000000..51c306f8
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
@@ -0,0 +1,51 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BYTEORDER_I686_H_
+#define _RTE_BYTEORDER_I686_H_
+
+/*
+ * An architecture-optimized byte swap for a 64-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap64().
+ */
+/* Compat./Leg. mode */
+static inline uint64_t rte_arch_bswap64(uint64_t x)
+{
+ uint64_t ret = 0;
+ ret |= ((uint64_t)rte_arch_bswap32(x & 0xffffffffUL) << 32);
+ ret |= ((uint64_t)rte_arch_bswap32((x >> 32) & 0xffffffffUL));
+ return ret;
+}
+
+#endif /* _RTE_BYTEORDER_I686_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
new file mode 100644
index 00000000..dda572bd
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
@@ -0,0 +1,52 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BYTEORDER_X86_64_H_
+#define _RTE_BYTEORDER_X86_64_H_
+
+/*
+ * An architecture-optimized byte swap for a 64-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap64().
+ */
+/* 64-bit mode */
+static inline uint64_t rte_arch_bswap64(uint64_t _x)
+{
+ register uint64_t x = _x;
+ asm volatile ("bswap %[x]"
+ : [x] "+r" (x)
+ );
+ return x;
+}
+
+#endif /* _RTE_BYTEORDER_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h b/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h
new file mode 100644
index 00000000..26204fab
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h
@@ -0,0 +1,153 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CPUFLAGS_X86_64_H_
+#define _RTE_CPUFLAGS_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rte_cpu_flag_t {
+ /* (EAX 01h) ECX features*/
+ RTE_CPUFLAG_SSE3 = 0, /**< SSE3 */
+ RTE_CPUFLAG_PCLMULQDQ, /**< PCLMULQDQ */
+ RTE_CPUFLAG_DTES64, /**< DTES64 */
+ RTE_CPUFLAG_MONITOR, /**< MONITOR */
+ RTE_CPUFLAG_DS_CPL, /**< DS_CPL */
+ RTE_CPUFLAG_VMX, /**< VMX */
+ RTE_CPUFLAG_SMX, /**< SMX */
+ RTE_CPUFLAG_EIST, /**< EIST */
+ RTE_CPUFLAG_TM2, /**< TM2 */
+ RTE_CPUFLAG_SSSE3, /**< SSSE3 */
+ RTE_CPUFLAG_CNXT_ID, /**< CNXT_ID */
+ RTE_CPUFLAG_FMA, /**< FMA */
+ RTE_CPUFLAG_CMPXCHG16B, /**< CMPXCHG16B */
+ RTE_CPUFLAG_XTPR, /**< XTPR */
+ RTE_CPUFLAG_PDCM, /**< PDCM */
+ RTE_CPUFLAG_PCID, /**< PCID */
+ RTE_CPUFLAG_DCA, /**< DCA */
+ RTE_CPUFLAG_SSE4_1, /**< SSE4_1 */
+ RTE_CPUFLAG_SSE4_2, /**< SSE4_2 */
+ RTE_CPUFLAG_X2APIC, /**< X2APIC */
+ RTE_CPUFLAG_MOVBE, /**< MOVBE */
+ RTE_CPUFLAG_POPCNT, /**< POPCNT */
+ RTE_CPUFLAG_TSC_DEADLINE, /**< TSC_DEADLINE */
+ RTE_CPUFLAG_AES, /**< AES */
+ RTE_CPUFLAG_XSAVE, /**< XSAVE */
+ RTE_CPUFLAG_OSXSAVE, /**< OSXSAVE */
+ RTE_CPUFLAG_AVX, /**< AVX */
+ RTE_CPUFLAG_F16C, /**< F16C */
+ RTE_CPUFLAG_RDRAND, /**< RDRAND */
+
+ /* (EAX 01h) EDX features */
+ RTE_CPUFLAG_FPU, /**< FPU */
+ RTE_CPUFLAG_VME, /**< VME */
+ RTE_CPUFLAG_DE, /**< DE */
+ RTE_CPUFLAG_PSE, /**< PSE */
+ RTE_CPUFLAG_TSC, /**< TSC */
+ RTE_CPUFLAG_MSR, /**< MSR */
+ RTE_CPUFLAG_PAE, /**< PAE */
+ RTE_CPUFLAG_MCE, /**< MCE */
+ RTE_CPUFLAG_CX8, /**< CX8 */
+ RTE_CPUFLAG_APIC, /**< APIC */
+ RTE_CPUFLAG_SEP, /**< SEP */
+ RTE_CPUFLAG_MTRR, /**< MTRR */
+ RTE_CPUFLAG_PGE, /**< PGE */
+ RTE_CPUFLAG_MCA, /**< MCA */
+ RTE_CPUFLAG_CMOV, /**< CMOV */
+ RTE_CPUFLAG_PAT, /**< PAT */
+ RTE_CPUFLAG_PSE36, /**< PSE36 */
+ RTE_CPUFLAG_PSN, /**< PSN */
+ RTE_CPUFLAG_CLFSH, /**< CLFSH */
+ RTE_CPUFLAG_DS, /**< DS */
+ RTE_CPUFLAG_ACPI, /**< ACPI */
+ RTE_CPUFLAG_MMX, /**< MMX */
+ RTE_CPUFLAG_FXSR, /**< FXSR */
+ RTE_CPUFLAG_SSE, /**< SSE */
+ RTE_CPUFLAG_SSE2, /**< SSE2 */
+ RTE_CPUFLAG_SS, /**< SS */
+ RTE_CPUFLAG_HTT, /**< HTT */
+ RTE_CPUFLAG_TM, /**< TM */
+ RTE_CPUFLAG_PBE, /**< PBE */
+
+ /* (EAX 06h) EAX features */
+ RTE_CPUFLAG_DIGTEMP, /**< DIGTEMP */
+ RTE_CPUFLAG_TRBOBST, /**< TRBOBST */
+ RTE_CPUFLAG_ARAT, /**< ARAT */
+ RTE_CPUFLAG_PLN, /**< PLN */
+ RTE_CPUFLAG_ECMD, /**< ECMD */
+ RTE_CPUFLAG_PTM, /**< PTM */
+
+ /* (EAX 06h) ECX features */
+ RTE_CPUFLAG_MPERF_APERF_MSR, /**< MPERF_APERF_MSR */
+ RTE_CPUFLAG_ACNT2, /**< ACNT2 */
+ RTE_CPUFLAG_ENERGY_EFF, /**< ENERGY_EFF */
+
+ /* (EAX 07h, ECX 0h) EBX features */
+ RTE_CPUFLAG_FSGSBASE, /**< FSGSBASE */
+ RTE_CPUFLAG_BMI1, /**< BMI1 */
+ RTE_CPUFLAG_HLE, /**< Hardware Lock elision */
+ RTE_CPUFLAG_AVX2, /**< AVX2 */
+ RTE_CPUFLAG_SMEP, /**< SMEP */
+ RTE_CPUFLAG_BMI2, /**< BMI2 */
+ RTE_CPUFLAG_ERMS, /**< ERMS */
+ RTE_CPUFLAG_INVPCID, /**< INVPCID */
+ RTE_CPUFLAG_RTM, /**< Transactional memory */
+ RTE_CPUFLAG_AVX512F, /**< AVX512F */
+
+ /* (EAX 80000001h) ECX features */
+ RTE_CPUFLAG_LAHF_SAHF, /**< LAHF_SAHF */
+ RTE_CPUFLAG_LZCNT, /**< LZCNT */
+
+ /* (EAX 80000001h) EDX features */
+ RTE_CPUFLAG_SYSCALL, /**< SYSCALL */
+ RTE_CPUFLAG_XD, /**< XD */
+ RTE_CPUFLAG_1GB_PG, /**< 1GB_PG */
+ RTE_CPUFLAG_RDTSCP, /**< RDTSCP */
+ RTE_CPUFLAG_EM64T, /**< EM64T */
+
+ /* (EAX 80000007h) EDX features */
+ RTE_CPUFLAG_INVTSC, /**< INVTSC */
+
+ /* The last item */
+ RTE_CPUFLAG_NUMFLAGS, /**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/lib/librte_eal/common/include/arch/x86/rte_cycles.h
new file mode 100644
index 00000000..6e3c7d89
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_cycles.h
@@ -0,0 +1,121 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/* BSD LICENSE
+ *
+ * Copyright(c) 2013 6WIND.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CYCLES_X86_64_H_
+#define _RTE_CYCLES_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+/* Global switch to use VMWARE mapping of TSC instead of RDTSC */
+extern int rte_cycles_vmware_tsc_map;
+#include <rte_branch_prediction.h>
+#endif
+
+static inline uint64_t
+rte_rdtsc(void)
+{
+ union {
+ uint64_t tsc_64;
+ struct {
+ uint32_t lo_32;
+ uint32_t hi_32;
+ };
+ } tsc;
+
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+ if (unlikely(rte_cycles_vmware_tsc_map)) {
+ /* ecx = 0x10000 corresponds to the physical TSC for VMware */
+ asm volatile("rdpmc" :
+ "=a" (tsc.lo_32),
+ "=d" (tsc.hi_32) :
+ "c"(0x10000));
+ return tsc.tsc_64;
+ }
+#endif
+
+ asm volatile("rdtsc" :
+ "=a" (tsc.lo_32),
+ "=d" (tsc.hi_32));
+ return tsc.tsc_64;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+ rte_mb();
+ return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
new file mode 100644
index 00000000..f463ab30
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
@@ -0,0 +1,884 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_X86_64_H_
+#define _RTE_MEMCPY_X86_64_H_
+
+/**
+ * @file
+ *
+ * Functions for SSE/AVX/AVX2/AVX512 implementation of memcpy().
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Copy bytes from one location to another. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ * @param n
+ * Number of bytes to copy.
+ * @return
+ * Pointer to the destination data.
+ */
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline));
+
+#ifdef RTE_MACHINE_CPUFLAG_AVX512F
+
+/**
+ * AVX512 implementation below
+ */
+
+/**
+ * Copy 16 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128((const __m128i *)src);
+ _mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+/**
+ * Copy 32 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ __m256i ymm0;
+
+ ymm0 = _mm256_loadu_si256((const __m256i *)src);
+ _mm256_storeu_si256((__m256i *)dst, ymm0);
+}
+
+/**
+ * Copy 64 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ __m512i zmm0;
+
+ zmm0 = _mm512_loadu_si512((const void *)src);
+ _mm512_storeu_si512((void *)dst, zmm0);
+}
+
+/**
+ * Copy 128 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov64(dst + 0 * 64, src + 0 * 64);
+ rte_mov64(dst + 1 * 64, src + 1 * 64);
+}
+
+/**
+ * Copy 256 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov64(dst + 0 * 64, src + 0 * 64);
+ rte_mov64(dst + 1 * 64, src + 1 * 64);
+ rte_mov64(dst + 2 * 64, src + 2 * 64);
+ rte_mov64(dst + 3 * 64, src + 3 * 64);
+}
+
+/**
+ * Copy 128-byte blocks from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ __m512i zmm0, zmm1;
+
+ while (n >= 128) {
+ zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64));
+ n -= 128;
+ zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64));
+ src = src + 128;
+ _mm512_storeu_si512((void *)(dst + 0 * 64), zmm0);
+ _mm512_storeu_si512((void *)(dst + 1 * 64), zmm1);
+ dst = dst + 128;
+ }
+}
+
+/**
+ * Copy 512-byte blocks from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ __m512i zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
+
+ while (n >= 512) {
+ zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64));
+ n -= 512;
+ zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64));
+ zmm2 = _mm512_loadu_si512((const void *)(src + 2 * 64));
+ zmm3 = _mm512_loadu_si512((const void *)(src + 3 * 64));
+ zmm4 = _mm512_loadu_si512((const void *)(src + 4 * 64));
+ zmm5 = _mm512_loadu_si512((const void *)(src + 5 * 64));
+ zmm6 = _mm512_loadu_si512((const void *)(src + 6 * 64));
+ zmm7 = _mm512_loadu_si512((const void *)(src + 7 * 64));
+ src = src + 512;
+ _mm512_storeu_si512((void *)(dst + 0 * 64), zmm0);
+ _mm512_storeu_si512((void *)(dst + 1 * 64), zmm1);
+ _mm512_storeu_si512((void *)(dst + 2 * 64), zmm2);
+ _mm512_storeu_si512((void *)(dst + 3 * 64), zmm3);
+ _mm512_storeu_si512((void *)(dst + 4 * 64), zmm4);
+ _mm512_storeu_si512((void *)(dst + 5 * 64), zmm5);
+ _mm512_storeu_si512((void *)(dst + 6 * 64), zmm6);
+ _mm512_storeu_si512((void *)(dst + 7 * 64), zmm7);
+ dst = dst + 512;
+ }
+}
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+ uintptr_t dstu = (uintptr_t)dst;
+ uintptr_t srcu = (uintptr_t)src;
+ void *ret = dst;
+ size_t dstofss;
+ size_t bits;
+
+ /**
+ * Copy less than 16 bytes
+ */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(uint8_t *)dstu = *(const uint8_t *)srcu;
+ srcu = (uintptr_t)((const uint8_t *)srcu + 1);
+ dstu = (uintptr_t)((uint8_t *)dstu + 1);
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dstu = *(const uint16_t *)srcu;
+ srcu = (uintptr_t)((const uint16_t *)srcu + 1);
+ dstu = (uintptr_t)((uint16_t *)dstu + 1);
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dstu = *(const uint32_t *)srcu;
+ srcu = (uintptr_t)((const uint32_t *)srcu + 1);
+ dstu = (uintptr_t)((uint32_t *)dstu + 1);
+ }
+ if (n & 0x08)
+ *(uint64_t *)dstu = *(const uint64_t *)srcu;
+ return ret;
+ }
+
+ /**
+ * Fast way when copy size doesn't exceed 512 bytes
+ */
+ if (n <= 32) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 64) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+ return ret;
+ }
+ if (n <= 512) {
+ if (n >= 256) {
+ n -= 256;
+ rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 256;
+ dst = (uint8_t *)dst + 256;
+ }
+ if (n >= 128) {
+ n -= 128;
+ rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 128;
+ dst = (uint8_t *)dst + 128;
+ }
+COPY_BLOCK_128_BACK63:
+ if (n > 64) {
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov64((uint8_t *)dst - 64 + n,
+ (const uint8_t *)src - 64 + n);
+ return ret;
+ }
+ if (n > 0)
+ rte_mov64((uint8_t *)dst - 64 + n,
+ (const uint8_t *)src - 64 + n);
+ return ret;
+ }
+
+ /**
+ * Make store aligned when copy size exceeds 512 bytes
+ */
+ dstofss = ((uintptr_t)dst & 0x3F);
+ if (dstofss > 0) {
+ dstofss = 64 - dstofss;
+ n -= dstofss;
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + dstofss;
+ dst = (uint8_t *)dst + dstofss;
+ }
+
+ /**
+ * Copy 512-byte blocks.
+ * Use copy block function for better instruction order control,
+ * which is important when load is unaligned.
+ */
+ rte_mov512blocks((uint8_t *)dst, (const uint8_t *)src, n);
+ bits = n;
+ n = n & 511;
+ bits -= n;
+ src = (const uint8_t *)src + bits;
+ dst = (uint8_t *)dst + bits;
+
+ /**
+ * Copy 128-byte blocks.
+ * Use copy block function for better instruction order control,
+ * which is important when load is unaligned.
+ */
+ if (n >= 128) {
+ rte_mov128blocks((uint8_t *)dst, (const uint8_t *)src, n);
+ bits = n;
+ n = n & 127;
+ bits -= n;
+ src = (const uint8_t *)src + bits;
+ dst = (uint8_t *)dst + bits;
+ }
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_128_BACK63;
+}
+
+#elif defined RTE_MACHINE_CPUFLAG_AVX2
+
+/**
+ * AVX2 implementation below
+ */
+
+/**
+ * Copy 16 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128((const __m128i *)src);
+ _mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+/**
+ * Copy 32 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ __m256i ymm0;
+
+ ymm0 = _mm256_loadu_si256((const __m256i *)src);
+ _mm256_storeu_si256((__m256i *)dst, ymm0);
+}
+
+/**
+ * Copy 64 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
+ rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
+}
+
+/**
+ * Copy 128 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
+ rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
+ rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32);
+ rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32);
+}
+
+/**
+ * Copy 256 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
+ rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
+ rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32);
+ rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32);
+ rte_mov32((uint8_t *)dst + 4 * 32, (const uint8_t *)src + 4 * 32);
+ rte_mov32((uint8_t *)dst + 5 * 32, (const uint8_t *)src + 5 * 32);
+ rte_mov32((uint8_t *)dst + 6 * 32, (const uint8_t *)src + 6 * 32);
+ rte_mov32((uint8_t *)dst + 7 * 32, (const uint8_t *)src + 7 * 32);
+}
+
+/**
+ * Copy 64-byte blocks from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov64blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ __m256i ymm0, ymm1;
+
+ while (n >= 64) {
+ ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32));
+ n -= 64;
+ ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32));
+ src = (const uint8_t *)src + 64;
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
+ dst = (uint8_t *)dst + 64;
+ }
+}
+
+/**
+ * Copy 256-byte blocks from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov256blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
+
+ while (n >= 256) {
+ ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32));
+ n -= 256;
+ ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32));
+ ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32));
+ ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32));
+ ymm4 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 4 * 32));
+ ymm5 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 5 * 32));
+ ymm6 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 6 * 32));
+ ymm7 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 7 * 32));
+ src = (const uint8_t *)src + 256;
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2);
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3);
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 4 * 32), ymm4);
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 5 * 32), ymm5);
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 6 * 32), ymm6);
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 7 * 32), ymm7);
+ dst = (uint8_t *)dst + 256;
+ }
+}
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+ uintptr_t dstu = (uintptr_t)dst;
+ uintptr_t srcu = (uintptr_t)src;
+ void *ret = dst;
+ size_t dstofss;
+ size_t bits;
+
+ /**
+ * Copy less than 16 bytes
+ */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(uint8_t *)dstu = *(const uint8_t *)srcu;
+ srcu = (uintptr_t)((const uint8_t *)srcu + 1);
+ dstu = (uintptr_t)((uint8_t *)dstu + 1);
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dstu = *(const uint16_t *)srcu;
+ srcu = (uintptr_t)((const uint16_t *)srcu + 1);
+ dstu = (uintptr_t)((uint16_t *)dstu + 1);
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dstu = *(const uint32_t *)srcu;
+ srcu = (uintptr_t)((const uint32_t *)srcu + 1);
+ dstu = (uintptr_t)((uint32_t *)dstu + 1);
+ }
+ if (n & 0x08) {
+ *(uint64_t *)dstu = *(const uint64_t *)srcu;
+ }
+ return ret;
+ }
+
+ /**
+ * Fast way when copy size doesn't exceed 512 bytes
+ */
+ if (n <= 32) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 64) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n);
+ return ret;
+ }
+ if (n <= 512) {
+ if (n >= 256) {
+ n -= 256;
+ rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 256;
+ dst = (uint8_t *)dst + 256;
+ }
+ if (n >= 128) {
+ n -= 128;
+ rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 128;
+ dst = (uint8_t *)dst + 128;
+ }
+ if (n >= 64) {
+ n -= 64;
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 64;
+ dst = (uint8_t *)dst + 64;
+ }
+COPY_BLOCK_64_BACK31:
+ if (n > 32) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n);
+ return ret;
+ }
+ if (n > 0) {
+ rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n);
+ }
+ return ret;
+ }
+
+ /**
+ * Make store aligned when copy size exceeds 512 bytes
+ */
+ dstofss = (uintptr_t)dst & 0x1F;
+ if (dstofss > 0) {
+ dstofss = 32 - dstofss;
+ n -= dstofss;
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + dstofss;
+ dst = (uint8_t *)dst + dstofss;
+ }
+
+ /**
+ * Copy 256-byte blocks.
+ * Use copy block function for better instruction order control,
+ * which is important when load is unaligned.
+ */
+ rte_mov256blocks((uint8_t *)dst, (const uint8_t *)src, n);
+ bits = n;
+ n = n & 255;
+ bits -= n;
+ src = (const uint8_t *)src + bits;
+ dst = (uint8_t *)dst + bits;
+
+ /**
+ * Copy 64-byte blocks.
+ * Use copy block function for better instruction order control,
+ * which is important when load is unaligned.
+ */
+ if (n >= 64) {
+ rte_mov64blocks((uint8_t *)dst, (const uint8_t *)src, n);
+ bits = n;
+ n = n & 63;
+ bits -= n;
+ src = (const uint8_t *)src + bits;
+ dst = (uint8_t *)dst + bits;
+ }
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_64_BACK31;
+}
+
+#else /* RTE_MACHINE_CPUFLAG */
+
+/**
+ * SSE & AVX implementation below
+ */
+
+/**
+ * Copy 16 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128((const __m128i *)(const __m128i *)src);
+ _mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+/**
+ * Copy 32 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+ rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+}
+
+/**
+ * Copy 64 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+ rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+ rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16);
+ rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16);
+}
+
+/**
+ * Copy 128 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+ rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+ rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16);
+ rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16);
+ rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16);
+ rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16);
+ rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16);
+ rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16);
+}
+
+/**
+ * Copy 256 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+ rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+ rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16);
+ rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16);
+ rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16);
+ rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16);
+ rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16);
+ rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16);
+ rte_mov16((uint8_t *)dst + 8 * 16, (const uint8_t *)src + 8 * 16);
+ rte_mov16((uint8_t *)dst + 9 * 16, (const uint8_t *)src + 9 * 16);
+ rte_mov16((uint8_t *)dst + 10 * 16, (const uint8_t *)src + 10 * 16);
+ rte_mov16((uint8_t *)dst + 11 * 16, (const uint8_t *)src + 11 * 16);
+ rte_mov16((uint8_t *)dst + 12 * 16, (const uint8_t *)src + 12 * 16);
+ rte_mov16((uint8_t *)dst + 13 * 16, (const uint8_t *)src + 13 * 16);
+ rte_mov16((uint8_t *)dst + 14 * 16, (const uint8_t *)src + 14 * 16);
+ rte_mov16((uint8_t *)dst + 15 * 16, (const uint8_t *)src + 15 * 16);
+}
+
+/**
+ * Macro for copying unaligned block from one location to another with constant load offset,
+ * 47 bytes leftover maximum,
+ * locations should not overlap.
+ * Requirements:
+ * - Store is aligned
+ * - Load offset is <offset>, which must be immediate value within [1, 15]
+ * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
+ * - <dst>, <src>, <len> must be variables
+ * - __m128i <xmm0> ~ <xmm8> must be pre-defined
+ */
+#define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \
+({ \
+ int tmp; \
+ while (len >= 128 + 16 - offset) { \
+ xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \
+ len -= 128; \
+ xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \
+ xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \
+ xmm3 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 3 * 16)); \
+ xmm4 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 4 * 16)); \
+ xmm5 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 5 * 16)); \
+ xmm6 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 6 * 16)); \
+ xmm7 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 7 * 16)); \
+ xmm8 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 8 * 16)); \
+ src = (const uint8_t *)src + 128; \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \
+ dst = (uint8_t *)dst + 128; \
+ } \
+ tmp = len; \
+ len = ((len - 16 + offset) & 127) + 16 - offset; \
+ tmp -= len; \
+ src = (const uint8_t *)src + tmp; \
+ dst = (uint8_t *)dst + tmp; \
+ if (len >= 32 + 16 - offset) { \
+ while (len >= 32 + 16 - offset) { \
+ xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \
+ len -= 32; \
+ xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \
+ xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \
+ src = (const uint8_t *)src + 32; \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
+ dst = (uint8_t *)dst + 32; \
+ } \
+ tmp = len; \
+ len = ((len - 16 + offset) & 31) + 16 - offset; \
+ tmp -= len; \
+ src = (const uint8_t *)src + tmp; \
+ dst = (uint8_t *)dst + tmp; \
+ } \
+})
+
+/**
+ * Macro for copying unaligned block from one location to another,
+ * 47 bytes leftover maximum,
+ * locations should not overlap.
+ * Use switch here because the aligning instruction requires immediate value for shift count.
+ * Requirements:
+ * - Store is aligned
+ * - Load offset is <offset>, which must be within [1, 15]
+ * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
+ * - <dst>, <src>, <len> must be variables
+ * - __m128i <xmm0> ~ <xmm8> used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined
+ */
+#define MOVEUNALIGNED_LEFT47(dst, src, len, offset) \
+({ \
+ switch (offset) { \
+ case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break; \
+ case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break; \
+ case 0x03: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x03); break; \
+ case 0x04: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x04); break; \
+ case 0x05: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x05); break; \
+ case 0x06: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x06); break; \
+ case 0x07: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x07); break; \
+ case 0x08: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x08); break; \
+ case 0x09: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x09); break; \
+ case 0x0A: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0A); break; \
+ case 0x0B: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0B); break; \
+ case 0x0C: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0C); break; \
+ case 0x0D: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0D); break; \
+ case 0x0E: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0E); break; \
+ case 0x0F: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0F); break; \
+ default:; \
+ } \
+})
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
+ uintptr_t dstu = (uintptr_t)dst;
+ uintptr_t srcu = (uintptr_t)src;
+ void *ret = dst;
+ size_t dstofss;
+ size_t srcofs;
+
+ /**
+ * Copy less than 16 bytes
+ */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(uint8_t *)dstu = *(const uint8_t *)srcu;
+ srcu = (uintptr_t)((const uint8_t *)srcu + 1);
+ dstu = (uintptr_t)((uint8_t *)dstu + 1);
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dstu = *(const uint16_t *)srcu;
+ srcu = (uintptr_t)((const uint16_t *)srcu + 1);
+ dstu = (uintptr_t)((uint16_t *)dstu + 1);
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dstu = *(const uint32_t *)srcu;
+ srcu = (uintptr_t)((const uint32_t *)srcu + 1);
+ dstu = (uintptr_t)((uint32_t *)dstu + 1);
+ }
+ if (n & 0x08) {
+ *(uint64_t *)dstu = *(const uint64_t *)srcu;
+ }
+ return ret;
+ }
+
+ /**
+ * Fast way when copy size doesn't exceed 512 bytes
+ */
+ if (n <= 32) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 48) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 64) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst + 32, (const uint8_t *)src + 32);
+ rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 128) {
+ goto COPY_BLOCK_128_BACK15;
+ }
+ if (n <= 512) {
+ if (n >= 256) {
+ n -= 256;
+ rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov128((uint8_t *)dst + 128, (const uint8_t *)src + 128);
+ src = (const uint8_t *)src + 256;
+ dst = (uint8_t *)dst + 256;
+ }
+COPY_BLOCK_255_BACK15:
+ if (n >= 128) {
+ n -= 128;
+ rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 128;
+ dst = (uint8_t *)dst + 128;
+ }
+COPY_BLOCK_128_BACK15:
+ if (n >= 64) {
+ n -= 64;
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 64;
+ dst = (uint8_t *)dst + 64;
+ }
+COPY_BLOCK_64_BACK15:
+ if (n >= 32) {
+ n -= 32;
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 32;
+ dst = (uint8_t *)dst + 32;
+ }
+ if (n > 16) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n > 0) {
+ rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+ }
+ return ret;
+ }
+
+ /**
+ * Make store aligned when copy size exceeds 512 bytes,
+ * and make sure the first 15 bytes are copied, because
+ * unaligned copy functions require up to 15 bytes
+ * backwards access.
+ */
+ dstofss = (uintptr_t)dst & 0x0F;
+ if (dstofss > 0) {
+ dstofss = 16 - dstofss + 16;
+ n -= dstofss;
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + dstofss;
+ dst = (uint8_t *)dst + dstofss;
+ }
+ srcofs = ((uintptr_t)src & 0x0F);
+
+ /**
+ * For aligned copy
+ */
+ if (srcofs == 0) {
+ /**
+ * Copy 256-byte blocks
+ */
+ for (; n >= 256; n -= 256) {
+ rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+ dst = (uint8_t *)dst + 256;
+ src = (const uint8_t *)src + 256;
+ }
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_255_BACK15;
+ }
+
+ /**
+ * For copy with unaligned load
+ */
+ MOVEUNALIGNED_LEFT47(dst, src, n, srcofs);
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_64_BACK15;
+}
+
+#endif /* RTE_MACHINE_CPUFLAG */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
new file mode 100644
index 00000000..5dac47eb
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
@@ -0,0 +1,67 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PREFETCH_X86_64_H_
+#define _RTE_PREFETCH_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+ asm volatile ("prefetcht0 %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+ asm volatile ("prefetcht1 %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+ asm volatile ("prefetcht2 %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
new file mode 100644
index 00000000..d9356419
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
@@ -0,0 +1,73 @@
+#ifndef _RTE_RTM_H_
+#define _RTE_RTM_H_ 1
+
+/*
+ * Copyright (c) 2012,2013 Intel Corporation
+ * Author: Andi Kleen
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that: (1) source code distributions
+ * retain the above copyright notice and this paragraph in its entirety, (2)
+ * distributions including binary code include the above copyright notice and
+ * this paragraph in its entirety in the documentation or other materials
+ * provided with the distribution
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+/* Official RTM intrinsics interface matching gcc/icc, but works
+ on older gcc compatible compilers and binutils. */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define RTE_XBEGIN_STARTED (~0u)
+#define RTE_XABORT_EXPLICIT (1 << 0)
+#define RTE_XABORT_RETRY (1 << 1)
+#define RTE_XABORT_CONFLICT (1 << 2)
+#define RTE_XABORT_CAPACITY (1 << 3)
+#define RTE_XABORT_DEBUG (1 << 4)
+#define RTE_XABORT_NESTED (1 << 5)
+#define RTE_XABORT_CODE(x) (((x) >> 24) & 0xff)
+
+static __attribute__((__always_inline__)) inline
+unsigned int rte_xbegin(void)
+{
+ unsigned int ret = RTE_XBEGIN_STARTED;
+
+ asm volatile(".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory");
+ return ret;
+}
+
+static __attribute__((__always_inline__)) inline
+void rte_xend(void)
+{
+ asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory");
+}
+
+static __attribute__((__always_inline__)) inline
+void rte_xabort(const unsigned int status)
+{
+ asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory");
+}
+
+static __attribute__((__always_inline__)) inline
+int rte_xtest(void)
+{
+ unsigned char out;
+
+ asm volatile(".byte 0x0f,0x01,0xd6 ; setnz %0" :
+ "=r" (out) :: "memory");
+ return out;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RTM_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_rwlock.h b/lib/librte_eal/common/include/arch/x86/rte_rwlock.h
new file mode 100644
index 00000000..afd1c3c2
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_rwlock.h
@@ -0,0 +1,82 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_RWLOCK_X86_64_H_
+#define _RTE_RWLOCK_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+#include "rte_spinlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+ if (likely(rte_try_tm(&rwl->cnt)))
+ return;
+ rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+ if (unlikely(rwl->cnt))
+ rte_rwlock_read_unlock(rwl);
+ else
+ rte_xend();
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+ if (likely(rte_try_tm(&rwl->cnt)))
+ return;
+ rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+ if (unlikely(rwl->cnt))
+ rte_rwlock_write_unlock(rwl);
+ else
+ rte_xend();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
new file mode 100644
index 00000000..02f95cbb
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
@@ -0,0 +1,201 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_SPINLOCK_X86_64_H_
+#define _RTE_SPINLOCK_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_spinlock.h"
+#include "rte_rtm.h"
+#include "rte_cpuflags.h"
+#include "rte_branch_prediction.h"
+#include "rte_common.h"
+
+#define RTE_RTM_MAX_RETRIES (10)
+#define RTE_XABORT_LOCK_BUSY (0xff)
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+ int lock_val = 1;
+ asm volatile (
+ "1:\n"
+ "xchg %[locked], %[lv]\n"
+ "test %[lv], %[lv]\n"
+ "jz 3f\n"
+ "2:\n"
+ "pause\n"
+ "cmpl $0, %[locked]\n"
+ "jnz 2b\n"
+ "jmp 1b\n"
+ "3:\n"
+ : [locked] "=m" (sl->locked), [lv] "=q" (lock_val)
+ : "[lv]" (lock_val)
+ : "memory");
+}
+
+static inline void
+rte_spinlock_unlock (rte_spinlock_t *sl)
+{
+ int unlock_val = 0;
+ asm volatile (
+ "xchg %[locked], %[ulv]\n"
+ : [locked] "=m" (sl->locked), [ulv] "=q" (unlock_val)
+ : "[ulv]" (unlock_val)
+ : "memory");
+}
+
+static inline int
+rte_spinlock_trylock (rte_spinlock_t *sl)
+{
+ int lockval = 1;
+
+ asm volatile (
+ "xchg %[locked], %[lockval]"
+ : [locked] "=m" (sl->locked), [lockval] "=q" (lockval)
+ : "[lockval]" (lockval)
+ : "memory");
+
+ return lockval == 0;
+}
+#endif
+
+static uint8_t rtm_supported; /* cache the flag to avoid the overhead
+ of the rte_cpu_get_flag_enabled function */
+
+static inline void __attribute__((constructor))
+rte_rtm_init(void)
+{
+ rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM);
+}
+
+static inline int rte_tm_supported(void)
+{
+ return rtm_supported;
+}
+
+static inline int
+rte_try_tm(volatile int *lock)
+{
+ if (!rtm_supported)
+ return 0;
+
+ int retries = RTE_RTM_MAX_RETRIES;
+
+ while (likely(retries--)) {
+
+ unsigned int status = rte_xbegin();
+
+ if (likely(RTE_XBEGIN_STARTED == status)) {
+ if (unlikely(*lock))
+ rte_xabort(RTE_XABORT_LOCK_BUSY);
+ else
+ return 1;
+ }
+ while (*lock)
+ rte_pause();
+
+ if ((status & RTE_XABORT_EXPLICIT) &&
+ (RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY))
+ continue;
+
+ if ((status & RTE_XABORT_RETRY) == 0) /* do not retry */
+ break;
+ }
+ return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+ if (likely(rte_try_tm(&sl->locked)))
+ return;
+
+ rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+ if (likely(rte_try_tm(&sl->locked)))
+ return 1;
+
+ return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+ if (unlikely(sl->locked))
+ rte_spinlock_unlock(sl);
+ else
+ rte_xend();
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+ if (likely(rte_try_tm(&slr->sl.locked)))
+ return;
+
+ rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+ if (unlikely(slr->sl.locked))
+ rte_spinlock_recursive_unlock(slr);
+ else
+ rte_xend();
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+ if (likely(rte_try_tm(&slr->sl.locked)))
+ return 1;
+
+ return rte_spinlock_recursive_trylock(slr);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h
new file mode 100644
index 00000000..b698797c
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
@@ -0,0 +1,132 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_VECT_H_
+#define _RTE_VECT_H_
+
+/**
+ * @file
+ *
+ * RTE SSE/AVX related header.
+ */
+
+#if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#ifdef __SSE3__
+#include <tmmintrin.h>
+#endif
+
+#if defined(__SSE4_2__) || defined(__SSE4_1__)
+#include <smmintrin.h>
+#endif
+
+#if defined(__AVX__)
+#include <immintrin.h>
+#endif
+
+#else
+
+#include <x86intrin.h>
+
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef __m128i xmm_t;
+
+#define XMM_SIZE (sizeof(xmm_t))
+#define XMM_MASK (XMM_SIZE - 1)
+
+typedef union rte_xmm {
+ xmm_t x;
+ uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
+ uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+ uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+ uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+ double pd[XMM_SIZE / sizeof(double)];
+} rte_xmm_t;
+
+#ifdef __AVX__
+
+typedef __m256i ymm_t;
+
+#define YMM_SIZE (sizeof(ymm_t))
+#define YMM_MASK (YMM_SIZE - 1)
+
+typedef union rte_ymm {
+ ymm_t y;
+ xmm_t x[YMM_SIZE / sizeof(xmm_t)];
+ uint8_t u8[YMM_SIZE / sizeof(uint8_t)];
+ uint16_t u16[YMM_SIZE / sizeof(uint16_t)];
+ uint32_t u32[YMM_SIZE / sizeof(uint32_t)];
+ uint64_t u64[YMM_SIZE / sizeof(uint64_t)];
+ double pd[YMM_SIZE / sizeof(double)];
+} rte_ymm_t;
+
+#endif /* __AVX__ */
+
+#ifdef RTE_ARCH_I686
+#define _mm_cvtsi128_si64(a) ({ \
+ rte_xmm_t m; \
+ m.x = (a); \
+ (m.u64[0]); \
+})
+#endif
+
+/*
+ * Prior to version 12.1 icc doesn't support _mm_set_epi64x.
+ */
+#if (defined(__ICC) && __ICC < 1210)
+#define _mm_set_epi64x(a, b) ({ \
+ rte_xmm_t m; \
+ m.u64[0] = b; \
+ m.u64[1] = a; \
+ (m.x); \
+})
+#endif /* (defined(__ICC) && __ICC < 1210) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VECT_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h
new file mode 100644
index 00000000..bfb4fe44
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_atomic.h
@@ -0,0 +1,945 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ATOMIC_H_
+#define _RTE_ATOMIC_H_
+
+/**
+ * @file
+ * Atomic Operations
+ *
+ * This file defines a generic API for atomic operations.
+ */
+
+#include <stdint.h>
+
+#ifdef __DOXYGEN__
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_mb(void);
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_wmb(void);
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_rmb(void);
+
+/**
+ * General memory barrier between lcores
+ *
+ * Guarantees that the LOAD and STORE operations that precede the
+ * rte_smp_mb() call are globally visible across the lcores
+ * before the the LOAD and STORE operations that follows it.
+ */
+static inline void rte_smp_mb(void);
+
+/**
+ * Write memory barrier between lcores
+ *
+ * Guarantees that the STORE operations that precede the
+ * rte_smp_wmb() call are globally visible across the lcores
+ * before the the STORE operations that follows it.
+ */
+static inline void rte_smp_wmb(void);
+
+/**
+ * Read memory barrier between lcores
+ *
+ * Guarantees that the LOAD operations that precede the
+ * rte_smp_rmb() call are globally visible across the lcores
+ * before the the LOAD operations that follows it.
+ */
+static inline void rte_smp_rmb(void);
+
+#endif /* __DOXYGEN__ */
+
+/**
+ * Compiler barrier.
+ *
+ * Guarantees that operation reordering does not occur at compile time
+ * for operations directly before and after the barrier.
+ */
+#define rte_compiler_barrier() do { \
+ asm volatile ("" : : : "memory"); \
+} while(0)
+
+/*------------------------- 16 bit atomic operations -------------------------*/
+
+/**
+ * Atomic compare and set.
+ *
+ * (atomic) equivalent to:
+ * if (*dst == exp)
+ * *dst = src (all 16-bit words)
+ *
+ * @param dst
+ * The destination location into which the value will be written.
+ * @param exp
+ * The expected value.
+ * @param src
+ * The new value.
+ * @return
+ * Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+ return __sync_bool_compare_and_swap(dst, exp, src);
+}
+#endif
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+ volatile int16_t cnt; /**< An internal counter value. */
+} rte_atomic16_t;
+
+/**
+ * Static initializer for an atomic counter.
+ */
+#define RTE_ATOMIC16_INIT(val) { (val) }
+
+/**
+ * Initialize an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic16_init(rte_atomic16_t *v)
+{
+ v->cnt = 0;
+}
+
+/**
+ * Atomically read a 16-bit value from a counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * The value of the counter.
+ */
+static inline int16_t
+rte_atomic16_read(const rte_atomic16_t *v)
+{
+ return v->cnt;
+}
+
+/**
+ * Atomically set a counter to a 16-bit value.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param new_value
+ * The new value for the counter.
+ */
+static inline void
+rte_atomic16_set(rte_atomic16_t *v, int16_t new_value)
+{
+ v->cnt = new_value;
+}
+
+/**
+ * Atomically add a 16-bit value to an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ */
+static inline void
+rte_atomic16_add(rte_atomic16_t *v, int16_t inc)
+{
+ __sync_fetch_and_add(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 16-bit value from an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ */
+static inline void
+rte_atomic16_sub(rte_atomic16_t *v, int16_t dec)
+{
+ __sync_fetch_and_sub(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a counter by one.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+ rte_atomic16_add(v, 1);
+}
+#endif
+
+/**
+ * Atomically decrement a counter by one.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+ rte_atomic16_sub(v, 1);
+}
+#endif
+
+/**
+ * Atomically add a 16-bit value to a counter and return the result.
+ *
+ * Atomically adds the 16-bits value (inc) to the atomic counter (v) and
+ * returns the value of v after addition.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ * @return
+ * The value of v after the addition.
+ */
+static inline int16_t
+rte_atomic16_add_return(rte_atomic16_t *v, int16_t inc)
+{
+ return __sync_add_and_fetch(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 16-bit value from a counter and return
+ * the result.
+ *
+ * Atomically subtracts the 16-bit value (inc) from the atomic counter
+ * (v) and returns the value of v after the subtraction.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ * @return
+ * The value of v after the subtraction.
+ */
+static inline int16_t
+rte_atomic16_sub_return(rte_atomic16_t *v, int16_t dec)
+{
+ return __sync_sub_and_fetch(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a 16-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after the increment operation is 0; false otherwise.
+ */
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+ return __sync_add_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically decrement a 16-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after the decrement operation is 0; false otherwise.
+ */
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+ return __sync_sub_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically test and set a 16-bit atomic counter.
+ *
+ * If the counter value is already set, return 0 (failed). Otherwise, set
+ * the counter value to 1 and return 1 (success).
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * 0 if failed; else 1, success.
+ */
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+ return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
+}
+#endif
+
+/**
+ * Atomically set a 16-bit counter to 0.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void rte_atomic16_clear(rte_atomic16_t *v)
+{
+ v->cnt = 0;
+}
+
+/*------------------------- 32 bit atomic operations -------------------------*/
+
+/**
+ * Atomic compare and set.
+ *
+ * (atomic) equivalent to:
+ * if (*dst == exp)
+ * *dst = src (all 32-bit words)
+ *
+ * @param dst
+ * The destination location into which the value will be written.
+ * @param exp
+ * The expected value.
+ * @param src
+ * The new value.
+ * @return
+ * Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+ return __sync_bool_compare_and_swap(dst, exp, src);
+}
+#endif
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+ volatile int32_t cnt; /**< An internal counter value. */
+} rte_atomic32_t;
+
+/**
+ * Static initializer for an atomic counter.
+ */
+#define RTE_ATOMIC32_INIT(val) { (val) }
+
+/**
+ * Initialize an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic32_init(rte_atomic32_t *v)
+{
+ v->cnt = 0;
+}
+
+/**
+ * Atomically read a 32-bit value from a counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * The value of the counter.
+ */
+static inline int32_t
+rte_atomic32_read(const rte_atomic32_t *v)
+{
+ return v->cnt;
+}
+
+/**
+ * Atomically set a counter to a 32-bit value.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param new_value
+ * The new value for the counter.
+ */
+static inline void
+rte_atomic32_set(rte_atomic32_t *v, int32_t new_value)
+{
+ v->cnt = new_value;
+}
+
+/**
+ * Atomically add a 32-bit value to an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ */
+static inline void
+rte_atomic32_add(rte_atomic32_t *v, int32_t inc)
+{
+ __sync_fetch_and_add(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 32-bit value from an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ */
+static inline void
+rte_atomic32_sub(rte_atomic32_t *v, int32_t dec)
+{
+ __sync_fetch_and_sub(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a counter by one.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v)
+{
+ rte_atomic32_add(v, 1);
+}
+#endif
+
+/**
+ * Atomically decrement a counter by one.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v)
+{
+ rte_atomic32_sub(v,1);
+}
+#endif
+
+/**
+ * Atomically add a 32-bit value to a counter and return the result.
+ *
+ * Atomically adds the 32-bits value (inc) to the atomic counter (v) and
+ * returns the value of v after addition.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ * @return
+ * The value of v after the addition.
+ */
+static inline int32_t
+rte_atomic32_add_return(rte_atomic32_t *v, int32_t inc)
+{
+ return __sync_add_and_fetch(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 32-bit value from a counter and return
+ * the result.
+ *
+ * Atomically subtracts the 32-bit value (inc) from the atomic counter
+ * (v) and returns the value of v after the subtraction.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ * @return
+ * The value of v after the subtraction.
+ */
+static inline int32_t
+rte_atomic32_sub_return(rte_atomic32_t *v, int32_t dec)
+{
+ return __sync_sub_and_fetch(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a 32-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after the increment operation is 0; false otherwise.
+ */
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
+{
+ return __sync_add_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically decrement a 32-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after the decrement operation is 0; false otherwise.
+ */
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
+{
+ return __sync_sub_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically test and set a 32-bit atomic counter.
+ *
+ * If the counter value is already set, return 0 (failed). Otherwise, set
+ * the counter value to 1 and return 1 (success).
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * 0 if failed; else 1, success.
+ */
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
+{
+ return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
+}
+#endif
+
+/**
+ * Atomically set a 32-bit counter to 0.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void rte_atomic32_clear(rte_atomic32_t *v)
+{
+ v->cnt = 0;
+}
+
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+/**
+ * An atomic compare and set function used by the mutex functions.
+ * (atomic) equivalent to:
+ * if (*dst == exp)
+ * *dst = src (all 64-bit words)
+ *
+ * @param dst
+ * The destination into which the value will be written.
+ * @param exp
+ * The expected value.
+ * @param src
+ * The new value.
+ * @return
+ * Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+ return __sync_bool_compare_and_swap(dst, exp, src);
+}
+#endif
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+ volatile int64_t cnt; /**< Internal counter value. */
+} rte_atomic64_t;
+
+/**
+ * Static initializer for an atomic counter.
+ */
+#define RTE_ATOMIC64_INIT(val) { (val) }
+
+/**
+ * Initialize the atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_init(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+#ifdef __LP64__
+ v->cnt = 0;
+#else
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, 0);
+ }
+#endif
+}
+#endif
+
+/**
+ * Atomically read a 64-bit counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * The value of the counter.
+ */
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+#ifdef __LP64__
+ return v->cnt;
+#else
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ /* replace the value by itself */
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp);
+ }
+ return tmp;
+#endif
+}
+#endif
+
+/**
+ * Atomically set a 64-bit counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param new_value
+ * The new value of the counter.
+ */
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+#ifdef __LP64__
+ v->cnt = new_value;
+#else
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, new_value);
+ }
+#endif
+}
+#endif
+
+/**
+ * Atomically add a 64-bit value to a counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ */
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+ __sync_fetch_and_add(&v->cnt, inc);
+}
+#endif
+
+/**
+ * Atomically subtract a 64-bit value from a counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ */
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+ __sync_fetch_and_sub(&v->cnt, dec);
+}
+#endif
+
+/**
+ * Atomically increment a 64-bit counter by one and test.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+ rte_atomic64_add(v, 1);
+}
+#endif
+
+/**
+ * Atomically decrement a 64-bit counter by one and test.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+ rte_atomic64_sub(v, 1);
+}
+#endif
+
+/**
+ * Add a 64-bit value to an atomic counter and return the result.
+ *
+ * Atomically adds the 64-bit value (inc) to the atomic counter (v) and
+ * returns the value of v after the addition.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ * @return
+ * The value of v after the addition.
+ */
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+ return __sync_add_and_fetch(&v->cnt, inc);
+}
+#endif
+
+/**
+ * Subtract a 64-bit value from an atomic counter and return the result.
+ *
+ * Atomically subtracts the 64-bit value (dec) from the atomic counter (v)
+ * and returns the value of v after the subtraction.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ * @return
+ * The value of v after the subtraction.
+ */
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+ return __sync_sub_and_fetch(&v->cnt, dec);
+}
+#endif
+
+/**
+ * Atomically increment a 64-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns
+ * true if the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after the addition is 0; false otherwise.
+ */
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+ return rte_atomic64_add_return(v, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically decrement a 64-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after subtraction is 0; false otherwise.
+ */
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+ return rte_atomic64_sub_return(v, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically test and set a 64-bit atomic counter.
+ *
+ * If the counter value is already set, return 0 (failed). Otherwise, set
+ * the counter value to 1 and return 1 (success).
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * 0 if failed; else 1, success.
+ */
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+ return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+#endif
+
+/**
+ * Atomically set a 64-bit counter to 0.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void rte_atomic64_clear(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+ rte_atomic64_set(v, 0);
+}
+#endif
+
+#endif /* _RTE_ATOMIC_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h
new file mode 100644
index 00000000..c46fdcf2
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_byteorder.h
@@ -0,0 +1,217 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BYTEORDER_H_
+#define _RTE_BYTEORDER_H_
+
+/**
+ * @file
+ *
+ * Byte Swap Operations
+ *
+ * This file defines a generic API for byte swap operations. Part of
+ * the implementation is architecture-specific.
+ */
+
+#include <stdint.h>
+#ifdef RTE_EXEC_ENV_BSDAPP
+#include <sys/endian.h>
+#else
+#include <endian.h>
+#endif
+
+/*
+ * Compile-time endianness detection
+ */
+#define RTE_BIG_ENDIAN 1
+#define RTE_LITTLE_ENDIAN 2
+#if defined __BYTE_ORDER__
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define RTE_BYTE_ORDER RTE_BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif /* __BYTE_ORDER__ */
+#elif defined __BYTE_ORDER
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define RTE_BYTE_ORDER RTE_BIG_ENDIAN
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif /* __BYTE_ORDER */
+#elif defined __BIG_ENDIAN__
+#define RTE_BYTE_ORDER RTE_BIG_ENDIAN
+#elif defined __LITTLE_ENDIAN__
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif
+
+/*
+ * An internal function to swap bytes in a 16-bit value.
+ *
+ * It is used by rte_bswap16() when the value is constant. Do not use
+ * this function directly; rte_bswap16() is preferred.
+ */
+static inline uint16_t
+rte_constant_bswap16(uint16_t x)
+{
+ return (uint16_t)(((x & 0x00ffU) << 8) |
+ ((x & 0xff00U) >> 8));
+}
+
+/*
+ * An internal function to swap bytes in a 32-bit value.
+ *
+ * It is used by rte_bswap32() when the value is constant. Do not use
+ * this function directly; rte_bswap32() is preferred.
+ */
+static inline uint32_t
+rte_constant_bswap32(uint32_t x)
+{
+ return ((x & 0x000000ffUL) << 24) |
+ ((x & 0x0000ff00UL) << 8) |
+ ((x & 0x00ff0000UL) >> 8) |
+ ((x & 0xff000000UL) >> 24);
+}
+
+/*
+ * An internal function to swap bytes of a 64-bit value.
+ *
+ * It is used by rte_bswap64() when the value is constant. Do not use
+ * this function directly; rte_bswap64() is preferred.
+ */
+static inline uint64_t
+rte_constant_bswap64(uint64_t x)
+{
+ return ((x & 0x00000000000000ffULL) << 56) |
+ ((x & 0x000000000000ff00ULL) << 40) |
+ ((x & 0x0000000000ff0000ULL) << 24) |
+ ((x & 0x00000000ff000000ULL) << 8) |
+ ((x & 0x000000ff00000000ULL) >> 8) |
+ ((x & 0x0000ff0000000000ULL) >> 24) |
+ ((x & 0x00ff000000000000ULL) >> 40) |
+ ((x & 0xff00000000000000ULL) >> 56);
+}
+
+
+#ifdef __DOXYGEN__
+
+/**
+ * Swap bytes in a 16-bit value.
+ */
+static uint16_t rte_bswap16(uint16_t _x);
+
+/**
+ * Swap bytes in a 32-bit value.
+ */
+static uint32_t rte_bswap32(uint32_t x);
+
+/**
+ * Swap bytes in a 64-bit value.
+ */
+static uint64_t rte_bswap64(uint64_t x);
+
+/**
+ * Convert a 16-bit value from CPU order to little endian.
+ */
+static uint16_t rte_cpu_to_le_16(uint16_t x);
+
+/**
+ * Convert a 32-bit value from CPU order to little endian.
+ */
+static uint32_t rte_cpu_to_le_32(uint32_t x);
+
+/**
+ * Convert a 64-bit value from CPU order to little endian.
+ */
+static uint64_t rte_cpu_to_le_64(uint64_t x);
+
+
+/**
+ * Convert a 16-bit value from CPU order to big endian.
+ */
+static uint16_t rte_cpu_to_be_16(uint16_t x);
+
+/**
+ * Convert a 32-bit value from CPU order to big endian.
+ */
+static uint32_t rte_cpu_to_be_32(uint32_t x);
+
+/**
+ * Convert a 64-bit value from CPU order to big endian.
+ */
+static uint64_t rte_cpu_to_be_64(uint64_t x);
+
+
+/**
+ * Convert a 16-bit value from little endian to CPU order.
+ */
+static uint16_t rte_le_to_cpu_16(uint16_t x);
+
+/**
+ * Convert a 32-bit value from little endian to CPU order.
+ */
+static uint32_t rte_le_to_cpu_32(uint32_t x);
+
+/**
+ * Convert a 64-bit value from little endian to CPU order.
+ */
+static uint64_t rte_le_to_cpu_64(uint64_t x);
+
+
+/**
+ * Convert a 16-bit value from big endian to CPU order.
+ */
+static uint16_t rte_be_to_cpu_16(uint16_t x);
+
+/**
+ * Convert a 32-bit value from big endian to CPU order.
+ */
+static uint32_t rte_be_to_cpu_32(uint32_t x);
+
+/**
+ * Convert a 64-bit value from big endian to CPU order.
+ */
+static uint64_t rte_be_to_cpu_64(uint64_t x);
+
+#endif /* __DOXYGEN__ */
+
+#ifdef RTE_FORCE_INTRINSICS
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+#define rte_bswap16(x) __builtin_bswap16(x)
+#endif
+
+#define rte_bswap32(x) __builtin_bswap32(x)
+
+#define rte_bswap64(x) __builtin_bswap64(x)
+
+#endif
+
+#endif /* _RTE_BYTEORDER_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h
new file mode 100644
index 00000000..c1da357c
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h
@@ -0,0 +1,82 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CPUFLAGS_H_
+#define _RTE_CPUFLAGS_H_
+
+/**
+ * @file
+ * Architecture specific API to determine available CPU features at runtime.
+ */
+
+#include <errno.h>
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t;
+
+/**
+ * Get name of CPU flag
+ *
+ * @param feature
+ * CPU flag ID
+ * @return
+ * flag name
+ * NULL if flag ID is invalid
+ */
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature);
+
+/**
+ * Function for checking a CPU flag availability
+ *
+ * @param feature
+ * CPU flag to query CPU for
+ * @return
+ * 1 if flag is available
+ * 0 if flag is not available
+ * -ENOENT if flag is invalid
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature);
+
+/**
+ * This function checks that the currently used CPU supports the CPU features
+ * that were specified at compile time. It is called automatically within the
+ * EAL, so does not need to be used by applications.
+ */
+void
+rte_cpu_check_supported(void);
+
+#endif /* _RTE_CPUFLAGS_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h
new file mode 100644
index 00000000..8cc21f20
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_cycles.h
@@ -0,0 +1,205 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/* BSD LICENSE
+ *
+ * Copyright(c) 2013 6WIND.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CYCLES_H_
+#define _RTE_CYCLES_H_
+
+/**
+ * @file
+ *
+ * Simple Time Reference Functions (Cycles and HPET).
+ */
+
+#include <stdint.h>
+#include <rte_debug.h>
+#include <rte_atomic.h>
+
+#define MS_PER_S 1000
+#define US_PER_S 1000000
+#define NS_PER_S 1000000000
+
+enum timer_source {
+ EAL_TIMER_TSC = 0,
+ EAL_TIMER_HPET
+};
+extern enum timer_source eal_timer_source;
+
+/**
+ * Get the measured frequency of the RDTSC counter
+ *
+ * @return
+ * The TSC frequency for this lcore
+ */
+uint64_t
+rte_get_tsc_hz(void);
+
+/**
+ * Return the number of TSC cycles since boot
+ *
+ * @return
+ * the number of cycles
+ */
+static inline uint64_t
+rte_get_tsc_cycles(void);
+
+#ifdef RTE_LIBEAL_USE_HPET
+/**
+ * Return the number of HPET cycles since boot
+ *
+ * This counter is global for all execution units. The number of
+ * cycles in one second can be retrieved using rte_get_hpet_hz().
+ *
+ * @return
+ * the number of cycles
+ */
+uint64_t
+rte_get_hpet_cycles(void);
+
+/**
+ * Get the number of HPET cycles in one second.
+ *
+ * @return
+ * The number of cycles in one second.
+ */
+uint64_t
+rte_get_hpet_hz(void);
+
+/**
+ * Initialise the HPET for use. This must be called before the rte_get_hpet_hz
+ * and rte_get_hpet_cycles APIs are called. If this function does not succeed,
+ * then the HPET functions are unavailable and should not be called.
+ *
+ * @param make_default
+ * If set, the hpet timer becomes the default timer whose values are
+ * returned by the rte_get_timer_hz/cycles API calls
+ *
+ * @return
+ * 0 on success,
+ * -1 on error, and the make_default parameter is ignored.
+ */
+int rte_eal_hpet_init(int make_default);
+
+#endif
+
+/**
+ * Get the number of cycles since boot from the default timer.
+ *
+ * @return
+ * The number of cycles
+ */
+static inline uint64_t
+rte_get_timer_cycles(void)
+{
+ switch(eal_timer_source) {
+ case EAL_TIMER_TSC:
+ return rte_get_tsc_cycles();
+ case EAL_TIMER_HPET:
+#ifdef RTE_LIBEAL_USE_HPET
+ return rte_get_hpet_cycles();
+#endif
+ default: rte_panic("Invalid timer source specified\n");
+ }
+}
+
+/**
+ * Get the number of cycles in one second for the default timer.
+ *
+ * @return
+ * The number of cycles in one second.
+ */
+static inline uint64_t
+rte_get_timer_hz(void)
+{
+ switch(eal_timer_source) {
+ case EAL_TIMER_TSC:
+ return rte_get_tsc_hz();
+ case EAL_TIMER_HPET:
+#ifdef RTE_LIBEAL_USE_HPET
+ return rte_get_hpet_hz();
+#endif
+ default: rte_panic("Invalid timer source specified\n");
+ }
+}
+
+/**
+ * Wait at least us microseconds.
+ *
+ * @param us
+ * The number of microseconds to wait.
+ */
+void
+rte_delay_us(unsigned us);
+
+/**
+ * Wait at least ms milliseconds.
+ *
+ * @param ms
+ * The number of milliseconds to wait.
+ */
+static inline void
+rte_delay_ms(unsigned ms)
+{
+ rte_delay_us(ms * 1000);
+}
+
+#endif /* _RTE_CYCLES_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memcpy.h b/lib/librte_eal/common/include/generic/rte_memcpy.h
new file mode 100644
index 00000000..03e84773
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_memcpy.h
@@ -0,0 +1,144 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_H_
+#define _RTE_MEMCPY_H_
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memcpy().
+ */
+
+/**
+ * Copy 16 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 32 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 48 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 64 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 128 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 256 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src);
+
+#ifdef __DOXYGEN__
+
+/**
+ * Copy bytes from one location to another. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ * @param n
+ * Number of bytes to copy.
+ * @return
+ * Pointer to the destination data.
+ */
+static void *
+rte_memcpy(void *dst, const void *src, size_t n);
+
+#endif /* __DOXYGEN__ */
+
+/*
+ * memcpy() function used by rte_memcpy macro
+ */
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n) __attribute__((always_inline));
+
+
+#endif /* _RTE_MEMCPY_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_prefetch.h b/lib/librte_eal/common/include/generic/rte_prefetch.h
new file mode 100644
index 00000000..07e409ec
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_prefetch.h
@@ -0,0 +1,83 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PREFETCH_H_
+#define _RTE_PREFETCH_H_
+
+/**
+ * @file
+ *
+ * Prefetch operations.
+ *
+ * This file defines an API for prefetch macros / inline-functions,
+ * which are architecture-dependent. Prefetching occurs when a
+ * processor requests an instruction or data from memory to cache
+ * before it is actually needed, potentially speeding up the execution of the
+ * program.
+ */
+
+/**
+ * Prefetch a cache line into all cache levels.
+ * @param p
+ * Address to prefetch
+ */
+static inline void rte_prefetch0(const volatile void *p);
+
+/**
+ * Prefetch a cache line into all cache levels except the 0th cache level.
+ * @param p
+ * Address to prefetch
+ */
+static inline void rte_prefetch1(const volatile void *p);
+
+/**
+ * Prefetch a cache line into all cache levels except the 0th and 1th cache
+ * levels.
+ * @param p
+ * Address to prefetch
+ */
+static inline void rte_prefetch2(const volatile void *p);
+
+/**
+ * Prefetch a cache line into all cache levels (non-temporal/transient version)
+ *
+ * The non-temporal prefetch is intended as a prefetch hint that processor will
+ * use the prefetched data only once or short period, unlike the
+ * rte_prefetch0() function which imply that prefetched data to use repeatedly.
+ *
+ * @param p
+ * Address to prefetch
+ */
+static inline void rte_prefetch_non_temporal(const volatile void *p);
+
+#endif /* _RTE_PREFETCH_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_rwlock.h b/lib/librte_eal/common/include/generic/rte_rwlock.h
new file mode 100644
index 00000000..7a0fdc55
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_rwlock.h
@@ -0,0 +1,208 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_RWLOCK_H_
+#define _RTE_RWLOCK_H_
+
+/**
+ * @file
+ *
+ * RTE Read-Write Locks
+ *
+ * This file defines an API for read-write locks. The lock is used to
+ * protect data that allows multiple readers in parallel, but only
+ * one writer. All readers are blocked until the writer is finished
+ * writing.
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+/**
+ * The rte_rwlock_t type.
+ *
+ * cnt is -1 when write lock is held, and > 0 when read locks are held.
+ */
+typedef struct {
+ volatile int32_t cnt; /**< -1 when W lock held, > 0 when R locks held. */
+} rte_rwlock_t;
+
+/**
+ * A static rwlock initializer.
+ */
+#define RTE_RWLOCK_INITIALIZER { 0 }
+
+/**
+ * Initialize the rwlock to an unlocked state.
+ *
+ * @param rwl
+ * A pointer to the rwlock structure.
+ */
+static inline void
+rte_rwlock_init(rte_rwlock_t *rwl)
+{
+ rwl->cnt = 0;
+}
+
+/**
+ * Take a read lock. Loop until the lock is held.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_read_lock(rte_rwlock_t *rwl)
+{
+ int32_t x;
+ int success = 0;
+
+ while (success == 0) {
+ x = rwl->cnt;
+ /* write lock is held */
+ if (x < 0) {
+ rte_pause();
+ continue;
+ }
+ success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt,
+ x, x + 1);
+ }
+}
+
+/**
+ * Release a read lock.
+ *
+ * @param rwl
+ * A pointer to the rwlock structure.
+ */
+static inline void
+rte_rwlock_read_unlock(rte_rwlock_t *rwl)
+{
+ rte_atomic32_dec((rte_atomic32_t *)(intptr_t)&rwl->cnt);
+}
+
+/**
+ * Take a write lock. Loop until the lock is held.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_lock(rte_rwlock_t *rwl)
+{
+ int32_t x;
+ int success = 0;
+
+ while (success == 0) {
+ x = rwl->cnt;
+ /* a lock is held */
+ if (x != 0) {
+ rte_pause();
+ continue;
+ }
+ success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt,
+ 0, -1);
+ }
+}
+
+/**
+ * Release a write lock.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_unlock(rte_rwlock_t *rwl)
+{
+ rte_atomic32_inc((rte_atomic32_t *)(intptr_t)&rwl->cnt);
+}
+
+/**
+ * Try to execute critical section in a hardware memory transaction, if it
+ * fails or not available take a read lock
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl);
+
+/**
+ * Commit hardware memory transaction or release the read lock if the lock is used as a fall-back
+ *
+ * @param rwl
+ * A pointer to the rwlock structure.
+ */
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl);
+
+/**
+ * Try to execute critical section in a hardware memory transaction, if it
+ * fails or not available take a write lock
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl);
+
+/**
+ * Commit hardware memory transaction or release the write lock if the lock is used as a fall-back
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_spinlock.h b/lib/librte_eal/common/include/generic/rte_spinlock.h
new file mode 100644
index 00000000..e51fc56b
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_spinlock.h
@@ -0,0 +1,325 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_SPINLOCK_H_
+#define _RTE_SPINLOCK_H_
+
+/**
+ * @file
+ *
+ * RTE Spinlocks
+ *
+ * This file defines an API for read-write locks, which are implemented
+ * in an architecture-specific way. This kind of lock simply waits in
+ * a loop repeatedly checking until the lock becomes available.
+ *
+ * All locks must be initialised before use, and only initialised once.
+ *
+ */
+
+#include <rte_lcore.h>
+#ifdef RTE_FORCE_INTRINSICS
+#include <rte_common.h>
+#endif
+
+/**
+ * The rte_spinlock_t type.
+ */
+typedef struct {
+ volatile int locked; /**< lock status 0 = unlocked, 1 = locked */
+} rte_spinlock_t;
+
+/**
+ * A static spinlock initializer.
+ */
+#define RTE_SPINLOCK_INITIALIZER { 0 }
+
+/**
+ * Initialize the spinlock to an unlocked state.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_init(rte_spinlock_t *sl)
+{
+ sl->locked = 0;
+}
+
+/**
+ * Take the spinlock.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+ while (__sync_lock_test_and_set(&sl->locked, 1))
+ while(sl->locked)
+ rte_pause();
+}
+#endif
+
+/**
+ * Release the spinlock.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_unlock (rte_spinlock_t *sl);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_spinlock_unlock (rte_spinlock_t *sl)
+{
+ __sync_lock_release(&sl->locked);
+}
+#endif
+
+/**
+ * Try to take the lock.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ * @return
+ * 1 if the lock is successfully taken; 0 otherwise.
+ */
+static inline int
+rte_spinlock_trylock (rte_spinlock_t *sl);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_spinlock_trylock (rte_spinlock_t *sl)
+{
+ return __sync_lock_test_and_set(&sl->locked,1) == 0;
+}
+#endif
+
+/**
+ * Test if the lock is taken.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ * @return
+ * 1 if the lock is currently taken; 0 otherwise.
+ */
+static inline int rte_spinlock_is_locked (rte_spinlock_t *sl)
+{
+ return sl->locked;
+}
+
+/**
+ * Test if hardware transactional memory (lock elision) is supported
+ *
+ * @return
+ * 1 if the hardware transactional memory is supported; 0 otherwise.
+ */
+static inline int rte_tm_supported(void);
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available take the spinlock.
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl);
+
+/**
+ * Commit hardware memory transaction or release the spinlock if
+ * the spinlock is used as a fall-back
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl);
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available try to take the lock.
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ * @return
+ * 1 if the hardware memory transaction is successfully started
+ * or lock is successfully taken; 0 otherwise.
+ */
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl);
+
+/**
+ * The rte_spinlock_recursive_t type.
+ */
+typedef struct {
+ rte_spinlock_t sl; /**< the actual spinlock */
+ volatile int user; /**< core id using lock, -1 for unused */
+ volatile int count; /**< count of time this lock has been called */
+} rte_spinlock_recursive_t;
+
+/**
+ * A static recursive spinlock initializer.
+ */
+#define RTE_SPINLOCK_RECURSIVE_INITIALIZER {RTE_SPINLOCK_INITIALIZER, -1, 0}
+
+/**
+ * Initialize the recursive spinlock to an unlocked state.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_init(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_init(&slr->sl);
+ slr->user = -1;
+ slr->count = 0;
+}
+
+/**
+ * Take the recursive spinlock.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_lock(rte_spinlock_recursive_t *slr)
+{
+ int id = rte_gettid();
+
+ if (slr->user != id) {
+ rte_spinlock_lock(&slr->sl);
+ slr->user = id;
+ }
+ slr->count++;
+}
+/**
+ * Release the recursive spinlock.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_unlock(rte_spinlock_recursive_t *slr)
+{
+ if (--(slr->count) == 0) {
+ slr->user = -1;
+ rte_spinlock_unlock(&slr->sl);
+ }
+
+}
+
+/**
+ * Try to take the recursive lock.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ * @return
+ * 1 if the lock is successfully taken; 0 otherwise.
+ */
+static inline int rte_spinlock_recursive_trylock(rte_spinlock_recursive_t *slr)
+{
+ int id = rte_gettid();
+
+ if (slr->user != id) {
+ if (rte_spinlock_trylock(&slr->sl) == 0)
+ return 0;
+ slr->user = id;
+ }
+ slr->count++;
+ return 1;
+}
+
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available take the recursive spinlocks
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_lock_tm(
+ rte_spinlock_recursive_t *slr);
+
+/**
+ * Commit hardware memory transaction or release the recursive spinlock
+ * if the recursive spinlock is used as a fall-back
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_unlock_tm(
+ rte_spinlock_recursive_t *slr);
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available try to take the recursive lock
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ * @return
+ * 1 if the hardware memory transaction is successfully started
+ * or lock is successfully taken; 0 otherwise.
+ */
+static inline int rte_spinlock_recursive_trylock_tm(
+ rte_spinlock_recursive_t *slr);
+
+#endif /* _RTE_SPINLOCK_H_ */
diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h
new file mode 100644
index 00000000..4012cd67
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_alarm.h
@@ -0,0 +1,106 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ALARM_H_
+#define _RTE_ALARM_H_
+
+/**
+ * @file
+ *
+ * Alarm functions
+ *
+ * Simple alarm-clock functionality supplied by eal.
+ * Does not require hpet support.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/**
+ * Signature of callback back function called when an alarm goes off.
+ */
+typedef void (*rte_eal_alarm_callback)(void *arg);
+
+/**
+ * Function to set a callback to be triggered when us microseconds
+ * have expired. Accuracy of timing to the microsecond is not guaranteed. The
+ * alarm function will not be called *before* the requested time, but may
+ * be called a short period of time afterwards.
+ * The alarm handler will be called only once. There is no need to call
+ * "rte_eal_alarm_cancel" from within the callback function.
+ *
+ * @param us
+ * The time in microseconds before the callback is called
+ * @param cb
+ * The function to be called when the alarm expires
+ * @param cb_arg
+ * Pointer parameter to be passed to the callback function
+ *
+ * @return
+ * On success, zero.
+ * On failure, a negative error number
+ */
+int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg);
+
+/**
+ * Function to cancel an alarm callback which has been registered before. If
+ * used outside alarm callback it wait for all callbacks to finish execution.
+ *
+ * @param cb_fn
+ * alarm callback
+ * @param cb_arg
+ * Pointer parameter to be passed to the callback function. To remove all
+ * copies of a given callback function, irrespective of parameter, (void *)-1
+ * can be used here.
+ *
+ * @return
+ * - value greater than 0 and rte_errno not changed - returned value is
+ * the number of canceled alarm callback functions
+ * - value greater or equal 0 and rte_errno set to EINPROGRESS, at least one
+ * alarm could not be canceled because cancellation was requested from alarm
+ * callback context. Returned value is the number of succesfuly canceled
+ * alarm callbacks
+ * - 0 and rte_errno set to ENOENT - no alarm found
+ * - -1 and rte_errno set to EINVAL - invalid parameter (NULL callback)
+ */
+int rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_ALARM_H_ */
diff --git a/lib/librte_eal/common/include/rte_branch_prediction.h b/lib/librte_eal/common/include/rte_branch_prediction.h
new file mode 100644
index 00000000..a6a56d17
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_branch_prediction.h
@@ -0,0 +1,70 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Branch Prediction Helpers in RTE
+ */
+
+#ifndef _RTE_BRANCH_PREDICTION_H_
+#define _RTE_BRANCH_PREDICTION_H_
+
+/**
+ * Check if a branch is likely to be taken.
+ *
+ * This compiler builtin allows the developer to indicate if a branch is
+ * likely to be taken. Example:
+ *
+ * if (likely(x > 1))
+ * do_stuff();
+ *
+ */
+#ifndef likely
+#define likely(x) __builtin_expect((x),1)
+#endif /* likely */
+
+/**
+ * Check if a branch is unlikely to be taken.
+ *
+ * This compiler builtin allows the developer to indicate if a branch is
+ * unlikely to be taken. Example:
+ *
+ * if (unlikely(x < 1))
+ * do_stuff();
+ *
+ */
+#ifndef unlikely
+#define unlikely(x) __builtin_expect((x),0)
+#endif /* unlikely */
+
+#endif /* _RTE_BRANCH_PREDICTION_H_ */
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
new file mode 100644
index 00000000..332f2a43
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -0,0 +1,401 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_COMMON_H_
+#define _RTE_COMMON_H_
+
+/**
+ * @file
+ *
+ * Generic, commonly-used macro and inline function definitions
+ * for DPDK.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+
+#ifndef typeof
+#define typeof __typeof__
+#endif
+
+#ifndef asm
+#define asm __asm__
+#endif
+
+#ifdef RTE_ARCH_STRICT_ALIGN
+typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1)));
+typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1)));
+typedef uint16_t unaligned_uint16_t __attribute__ ((aligned(1)));
+#else
+typedef uint64_t unaligned_uint64_t;
+typedef uint32_t unaligned_uint32_t;
+typedef uint16_t unaligned_uint16_t;
+#endif
+
+/**
+ * Force alignment
+ */
+#define __rte_aligned(a) __attribute__((__aligned__(a)))
+
+/**
+ * Force a structure to be packed
+ */
+#define __rte_packed __attribute__((__packed__))
+
+/******* Macro to mark functions and fields scheduled for removal *****/
+#define __rte_deprecated __attribute__((__deprecated__))
+
+/*********** Macros to eliminate unused variable warnings ********/
+
+/**
+ * short definition to mark a function parameter unused
+ */
+#define __rte_unused __attribute__((__unused__))
+
+/**
+ * definition to mark a variable or function parameter as used so
+ * as to avoid a compiler warning
+ */
+#define RTE_SET_USED(x) (void)(x)
+
+/*********** Macros for pointer arithmetic ********/
+
+/**
+ * add a byte-value offset from a pointer
+ */
+#define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x)))
+
+/**
+ * subtract a byte-value offset from a pointer
+ */
+#define RTE_PTR_SUB(ptr, x) ((void*)((uintptr_t)ptr - (x)))
+
+/**
+ * get the difference between two pointer values, i.e. how far apart
+ * in bytes are the locations they point two. It is assumed that
+ * ptr1 is greater than ptr2.
+ */
+#define RTE_PTR_DIFF(ptr1, ptr2) ((uintptr_t)(ptr1) - (uintptr_t)(ptr2))
+
+/*********** Macros/static functions for doing alignment ********/
+
+
+/**
+ * Macro to align a pointer to a given power-of-two. The resultant
+ * pointer will be a pointer of the same type as the first parameter, and
+ * point to an address no higher than the first parameter. Second parameter
+ * must be a power-of-two value.
+ */
+#define RTE_PTR_ALIGN_FLOOR(ptr, align) \
+ ((typeof(ptr))RTE_ALIGN_FLOOR((uintptr_t)ptr, align))
+
+/**
+ * Macro to align a value to a given power-of-two. The resultant value
+ * will be of the same type as the first parameter, and will be no
+ * bigger than the first parameter. Second parameter must be a
+ * power-of-two value.
+ */
+#define RTE_ALIGN_FLOOR(val, align) \
+ (typeof(val))((val) & (~((typeof(val))((align) - 1))))
+
+/**
+ * Macro to align a pointer to a given power-of-two. The resultant
+ * pointer will be a pointer of the same type as the first parameter, and
+ * point to an address no lower than the first parameter. Second parameter
+ * must be a power-of-two value.
+ */
+#define RTE_PTR_ALIGN_CEIL(ptr, align) \
+ RTE_PTR_ALIGN_FLOOR((typeof(ptr))RTE_PTR_ADD(ptr, (align) - 1), align)
+
+/**
+ * Macro to align a value to a given power-of-two. The resultant value
+ * will be of the same type as the first parameter, and will be no lower
+ * than the first parameter. Second parameter must be a power-of-two
+ * value.
+ */
+#define RTE_ALIGN_CEIL(val, align) \
+ RTE_ALIGN_FLOOR(((val) + ((typeof(val)) (align) - 1)), align)
+
+/**
+ * Macro to align a pointer to a given power-of-two. The resultant
+ * pointer will be a pointer of the same type as the first parameter, and
+ * point to an address no lower than the first parameter. Second parameter
+ * must be a power-of-two value.
+ * This function is the same as RTE_PTR_ALIGN_CEIL
+ */
+#define RTE_PTR_ALIGN(ptr, align) RTE_PTR_ALIGN_CEIL(ptr, align)
+
+/**
+ * Macro to align a value to a given power-of-two. The resultant
+ * value will be of the same type as the first parameter, and
+ * will be no lower than the first parameter. Second parameter
+ * must be a power-of-two value.
+ * This function is the same as RTE_ALIGN_CEIL
+ */
+#define RTE_ALIGN(val, align) RTE_ALIGN_CEIL(val, align)
+
+/**
+ * Checks if a pointer is aligned to a given power-of-two value
+ *
+ * @param ptr
+ * The pointer whose alignment is to be checked
+ * @param align
+ * The power-of-two value to which the ptr should be aligned
+ *
+ * @return
+ * True(1) where the pointer is correctly aligned, false(0) otherwise
+ */
+static inline int
+rte_is_aligned(void *ptr, unsigned align)
+{
+ return RTE_PTR_ALIGN(ptr, align) == ptr;
+}
+
+/*********** Macros for compile type checks ********/
+
+/**
+ * Triggers an error at compilation time if the condition is true.
+ */
+#ifndef __OPTIMIZE__
+#define RTE_BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#else
+extern int RTE_BUILD_BUG_ON_detected_error;
+#define RTE_BUILD_BUG_ON(condition) do { \
+ ((void)sizeof(char[1 - 2*!!(condition)])); \
+ if (condition) \
+ RTE_BUILD_BUG_ON_detected_error = 1; \
+} while(0)
+#endif
+
+/*********** Macros to work with powers of 2 ********/
+
+/**
+ * Returns true if n is a power of 2
+ * @param n
+ * Number to check
+ * @return 1 if true, 0 otherwise
+ */
+static inline int
+rte_is_power_of_2(uint32_t n)
+{
+ return n && !(n & (n - 1));
+}
+
+/**
+ * Aligns input parameter to the next power of 2
+ *
+ * @param x
+ * The integer value to algin
+ *
+ * @return
+ * Input parameter aligned to the next power of 2
+ */
+static inline uint32_t
+rte_align32pow2(uint32_t x)
+{
+ x--;
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+
+ return x + 1;
+}
+
+/**
+ * Aligns 64b input parameter to the next power of 2
+ *
+ * @param v
+ * The 64b value to align
+ *
+ * @return
+ * Input parameter aligned to the next power of 2
+ */
+static inline uint64_t
+rte_align64pow2(uint64_t v)
+{
+ v--;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v |= v >> 32;
+
+ return v + 1;
+}
+
+/*********** Macros for calculating min and max **********/
+
+/**
+ * Macro to return the minimum of two numbers
+ */
+#define RTE_MIN(a, b) ({ \
+ typeof (a) _a = (a); \
+ typeof (b) _b = (b); \
+ _a < _b ? _a : _b; \
+ })
+
+/**
+ * Macro to return the maximum of two numbers
+ */
+#define RTE_MAX(a, b) ({ \
+ typeof (a) _a = (a); \
+ typeof (b) _b = (b); \
+ _a > _b ? _a : _b; \
+ })
+
+/*********** Other general functions / macros ********/
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+/**
+ * PAUSE instruction for tight loops (avoid busy waiting)
+ */
+static inline void
+rte_pause (void)
+{
+ _mm_pause();
+}
+#else
+static inline void
+rte_pause(void) {}
+#endif
+
+/**
+ * Searches the input parameter for the least significant set bit
+ * (starting from zero).
+ * If a least significant 1 bit is found, its bit index is returned.
+ * If the content of the input parameter is zero, then the content of the return
+ * value is undefined.
+ * @param v
+ * input parameter, should not be zero.
+ * @return
+ * least significant set bit in the input parameter.
+ */
+static inline uint32_t
+rte_bsf32(uint32_t v)
+{
+ return __builtin_ctz(v);
+}
+
+#ifndef offsetof
+/** Return the offset of a field in a structure. */
+#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+#endif
+
+#define _RTE_STR(x) #x
+/** Take a macro value and get a string version of it */
+#define RTE_STR(x) _RTE_STR(x)
+
+/** Mask value of type "tp" for the first "ln" bit set. */
+#define RTE_LEN2MASK(ln, tp) \
+ ((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln))))
+
+/** Number of elements in the array. */
+#define RTE_DIM(a) (sizeof (a) / sizeof ((a)[0]))
+
+/**
+ * Converts a numeric string to the equivalent uint64_t value.
+ * As well as straight number conversion, also recognises the suffixes
+ * k, m and g for kilobytes, megabytes and gigabytes respectively.
+ *
+ * If a negative number is passed in i.e. a string with the first non-black
+ * character being "-", zero is returned. Zero is also returned in the case of
+ * an error with the strtoull call in the function.
+ *
+ * @param str
+ * String containing number to convert.
+ * @return
+ * Number.
+ */
+static inline uint64_t
+rte_str_to_size(const char *str)
+{
+ char *endptr;
+ unsigned long long size;
+
+ while (isspace((int)*str))
+ str++;
+ if (*str == '-')
+ return 0;
+
+ errno = 0;
+ size = strtoull(str, &endptr, 0);
+ if (errno)
+ return 0;
+
+ if (*endptr == ' ')
+ endptr++; /* allow 1 space gap */
+
+ switch (*endptr){
+ case 'G': case 'g': size *= 1024; /* fall-through */
+ case 'M': case 'm': size *= 1024; /* fall-through */
+ case 'K': case 'k': size *= 1024; /* fall-through */
+ default:
+ break;
+ }
+ return size;
+}
+
+/**
+ * Function to terminate the application immediately, printing an error
+ * message and returning the exit_code back to the shell.
+ *
+ * This function never returns
+ *
+ * @param exit_code
+ * The exit code to be returned by the application
+ * @param format
+ * The format string to be used for printing the message. This can include
+ * printf format characters which will be expanded using any further parameters
+ * to the function.
+ */
+void
+rte_exit(int exit_code, const char *format, ...)
+ __attribute__((noreturn))
+ __attribute__((format(printf, 2, 3)));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h
new file mode 100644
index 00000000..94129fab
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_debug.h
@@ -0,0 +1,103 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_DEBUG_H_
+#define _RTE_DEBUG_H_
+
+/**
+ * @file
+ *
+ * Debug Functions in RTE
+ *
+ * This file defines a generic API for debug operations. Part of
+ * the implementation is architecture-specific.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Dump the stack of the calling core to the console.
+ */
+void rte_dump_stack(void);
+
+/**
+ * Dump the registers of the calling core to the console.
+ *
+ * Note: Not implemented in a userapp environment; use gdb instead.
+ */
+void rte_dump_registers(void);
+
+/**
+ * Provide notification of a critical non-recoverable error and terminate
+ * execution abnormally.
+ *
+ * Display the format string and its expanded arguments (printf-like).
+ *
+ * In a linuxapp environment, this function dumps the stack and calls
+ * abort() resulting in a core dump if enabled.
+ *
+ * The function never returns.
+ *
+ * @param ...
+ * The format string, followed by the variable list of arguments.
+ */
+#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy")
+#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__)
+
+#define RTE_VERIFY(exp) do { \
+ if (!(exp)) \
+ rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
+} while (0)
+
+/*
+ * Provide notification of a critical non-recoverable error and stop.
+ *
+ * This function should not be called directly. Refer to rte_panic() macro
+ * documentation.
+ */
+void __rte_panic(const char *funcname , const char *format, ...)
+#ifdef __GNUC__
+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2))
+ __attribute__((cold))
+#endif
+#endif
+ __attribute__((noreturn))
+ __attribute__((format(printf, 2, 3)));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_DEBUG_H_ */
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
new file mode 100644
index 00000000..f1b55079
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -0,0 +1,192 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 6WIND S.A.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_DEV_H_
+#define _RTE_DEV_H_
+
+/**
+ * @file
+ *
+ * RTE PMD Driver Registration Interface
+ *
+ * This file manages the list of device drivers.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+
+__attribute__((format(printf, 2, 0)))
+static inline void
+rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ char buffer[vsnprintf(NULL, 0, fmt, ap) + 1];
+
+ va_end(ap);
+
+ va_start(ap, fmt);
+ vsnprintf(buffer, sizeof(buffer), fmt, ap);
+ va_end(ap);
+
+ rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer);
+}
+
+/* Macros for checking for restricting functions to primary instance only */
+#define RTE_PROC_PRIMARY_OR_ERR_RET(retval) do { \
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \
+ RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \
+ return retval; \
+ } \
+} while (0)
+
+#define RTE_PROC_PRIMARY_OR_RET() do { \
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \
+ RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \
+ return; \
+ } \
+} while (0)
+
+/* Macros to check for invalid function pointers */
+#define RTE_FUNC_PTR_OR_ERR_RET(func, retval) do { \
+ if ((func) == NULL) { \
+ RTE_PMD_DEBUG_TRACE("Function not supported\n"); \
+ return retval; \
+ } \
+} while (0)
+
+#define RTE_FUNC_PTR_OR_RET(func) do { \
+ if ((func) == NULL) { \
+ RTE_PMD_DEBUG_TRACE("Function not supported\n"); \
+ return; \
+ } \
+} while (0)
+
+
+/** Double linked list of device drivers. */
+TAILQ_HEAD(rte_driver_list, rte_driver);
+
+/**
+ * Initialization function called for each device driver once.
+ */
+typedef int (rte_dev_init_t)(const char *name, const char *args);
+
+/**
+ * Uninitilization function called for each device driver once.
+ */
+typedef int (rte_dev_uninit_t)(const char *name);
+
+/**
+ * Driver type enumeration
+ */
+enum pmd_type {
+ PMD_VDEV = 0,
+ PMD_PDEV = 1,
+};
+
+/**
+ * A structure describing a device driver.
+ */
+struct rte_driver {
+ TAILQ_ENTRY(rte_driver) next; /**< Next in list. */
+ enum pmd_type type; /**< PMD Driver type */
+ const char *name; /**< Driver name. */
+ rte_dev_init_t *init; /**< Device init. function. */
+ rte_dev_uninit_t *uninit; /**< Device uninit. function. */
+};
+
+/**
+ * Register a device driver.
+ *
+ * @param driver
+ * A pointer to a rte_dev structure describing the driver
+ * to be registered.
+ */
+void rte_eal_driver_register(struct rte_driver *driver);
+
+/**
+ * Unregister a device driver.
+ *
+ * @param driver
+ * A pointer to a rte_dev structure describing the driver
+ * to be unregistered.
+ */
+void rte_eal_driver_unregister(struct rte_driver *driver);
+
+/**
+ * Initalize all the registered drivers in this process
+ */
+int rte_eal_dev_init(void);
+
+/**
+ * Initialize a driver specified by name.
+ *
+ * @param name
+ * The pointer to a driver name to be initialized.
+ * @param args
+ * The pointer to arguments used by driver initialization.
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_vdev_init(const char *name, const char *args);
+
+/**
+ * Uninitalize a driver specified by name.
+ *
+ * @param name
+ * The pointer to a driver name to be initialized.
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_vdev_uninit(const char *name);
+
+#define PMD_REGISTER_DRIVER(d)\
+void devinitfn_ ##d(void);\
+void __attribute__((constructor, used)) devinitfn_ ##d(void)\
+{\
+ rte_eal_driver_register(&d);\
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VDEV_H_ */
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
new file mode 100644
index 00000000..53c59f56
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -0,0 +1,177 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2014 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A nor the names of its contributors
+ * may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_DEVARGS_H_
+#define _RTE_DEVARGS_H_
+
+/**
+ * @file
+ *
+ * RTE devargs: list of devices and their user arguments
+ *
+ * This file stores a list of devices and their arguments given by
+ * the user when a DPDK application is started. These devices can be PCI
+ * devices or virtual devices. These devices are stored at startup in a
+ * list of rte_devargs structures.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <sys/queue.h>
+#include <rte_pci.h>
+
+/**
+ * Type of generic device
+ */
+enum rte_devtype {
+ RTE_DEVTYPE_WHITELISTED_PCI,
+ RTE_DEVTYPE_BLACKLISTED_PCI,
+ RTE_DEVTYPE_VIRTUAL,
+};
+
+/**
+ * Structure that stores a device given by the user with its arguments
+ *
+ * A user device is a physical or a virtual device given by the user to
+ * the DPDK application at startup through command line arguments.
+ *
+ * The structure stores the configuration of the device, its PCI
+ * identifier if it's a PCI device or the driver name if it's a virtual
+ * device.
+ */
+struct rte_devargs {
+ /** Next in list. */
+ TAILQ_ENTRY(rte_devargs) next;
+ /** Type of device. */
+ enum rte_devtype type;
+ union {
+ /** Used if type is RTE_DEVTYPE_*_PCI. */
+ struct {
+ /** PCI location. */
+ struct rte_pci_addr addr;
+ } pci;
+ /** Used if type is RTE_DEVTYPE_VIRTUAL. */
+ struct {
+ /** Driver name. */
+ char drv_name[32];
+ } virt;
+ };
+ /** Arguments string as given by user or "" for no argument. */
+ char *args;
+};
+
+/** user device double-linked queue type definition */
+TAILQ_HEAD(rte_devargs_list, rte_devargs);
+
+/** Global list of user devices */
+extern struct rte_devargs_list devargs_list;
+
+/**
+ * Parse a devargs string.
+ *
+ * For PCI devices, the format of arguments string is "PCI_ADDR" or
+ * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0",
+ * "04:00.0,arg=val".
+ *
+ * For virtual devices, the format of arguments string is "DRIVER_NAME*"
+ * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring",
+ * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1".
+ *
+ * The function parses the arguments string to get driver name and driver
+ * arguments.
+ *
+ * @param devargs_str
+ * The arguments as given by the user.
+ * @param drvname
+ * The pointer to the string to store parsed driver name.
+ * @param drvargs
+ * The pointer to the string to store parsed driver arguments.
+ *
+ * @return
+ * - 0 on success
+ * - A negative value on error
+ */
+int rte_eal_parse_devargs_str(const char *devargs_str,
+ char **drvname, char **drvargs);
+
+/**
+ * Add a device to the user device list
+ *
+ * For PCI devices, the format of arguments string is "PCI_ADDR" or
+ * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0",
+ * "04:00.0,arg=val".
+ *
+ * For virtual devices, the format of arguments string is "DRIVER_NAME*"
+ * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring",
+ * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". The validity of the
+ * driver name is not checked by this function, it is done when probing
+ * the drivers.
+ *
+ * @param devtype
+ * The type of the device.
+ * @param devargs_str
+ * The arguments as given by the user.
+ *
+ * @return
+ * - 0 on success
+ * - A negative value on error
+ */
+int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
+
+/**
+ * Count the number of user devices of a specified type
+ *
+ * @param devtype
+ * The type of the devices to counted.
+ *
+ * @return
+ * The number of devices.
+ */
+unsigned int
+rte_eal_devargs_type_count(enum rte_devtype devtype);
+
+/**
+ * This function dumps the list of user device and their arguments.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void rte_eal_devargs_dump(FILE *f);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_DEVARGS_H_ */
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
new file mode 100644
index 00000000..a71d6f57
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -0,0 +1,259 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_EAL_H_
+#define _RTE_EAL_H_
+
+/**
+ * @file
+ *
+ * EAL Configuration API
+ */
+
+#include <stdint.h>
+#include <sched.h>
+
+#include <rte_per_lcore.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_MAGIC 19820526 /**< Magic number written by the main partition when ready. */
+
+/* Maximum thread_name length. */
+#define RTE_MAX_THREAD_NAME_LEN 16
+
+/**
+ * The lcore role (used in RTE or not).
+ */
+enum rte_lcore_role_t {
+ ROLE_RTE,
+ ROLE_OFF,
+};
+
+/**
+ * The type of process in a linuxapp, multi-process setup
+ */
+enum rte_proc_type_t {
+ RTE_PROC_AUTO = -1, /* allow auto-detection of primary/secondary */
+ RTE_PROC_PRIMARY = 0, /* set to zero, so primary is the default */
+ RTE_PROC_SECONDARY,
+
+ RTE_PROC_INVALID
+};
+
+/**
+ * The global RTE configuration structure.
+ */
+struct rte_config {
+ uint32_t master_lcore; /**< Id of the master lcore */
+ uint32_t lcore_count; /**< Number of available logical cores. */
+ enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */
+
+ /** Primary or secondary configuration */
+ enum rte_proc_type_t process_type;
+
+ /**
+ * Pointer to memory configuration, which may be shared across multiple
+ * DPDK instances
+ */
+ struct rte_mem_config *mem_config;
+} __attribute__((__packed__));
+
+/**
+ * Get the global configuration structure.
+ *
+ * @return
+ * A pointer to the global configuration structure.
+ */
+struct rte_config *rte_eal_get_configuration(void);
+
+/**
+ * Get a lcore's role.
+ *
+ * @param lcore_id
+ * The identifier of the lcore.
+ * @return
+ * The role of the lcore.
+ */
+enum rte_lcore_role_t rte_eal_lcore_role(unsigned lcore_id);
+
+
+/**
+ * Get the process type in a multi-process setup
+ *
+ * @return
+ * The process type
+ */
+enum rte_proc_type_t rte_eal_process_type(void);
+
+/**
+ * Request iopl privilege for all RPL.
+ *
+ * This function should be called by pmds which need access to ioports.
+
+ * @return
+ * - On success, returns 0.
+ * - On failure, returns -1.
+ */
+int rte_eal_iopl_init(void);
+
+/**
+ * Initialize the Environment Abstraction Layer (EAL).
+ *
+ * This function is to be executed on the MASTER lcore only, as soon
+ * as possible in the application's main() function.
+ *
+ * The function finishes the initialization process before main() is called.
+ * It puts the SLAVE lcores in the WAIT state.
+ *
+ * When the multi-partition feature is supported, depending on the
+ * configuration (if CONFIG_RTE_EAL_MAIN_PARTITION is disabled), this
+ * function waits to ensure that the magic number is set before
+ * returning. See also the rte_eal_get_configuration() function. Note:
+ * This behavior may change in the future.
+ *
+ * @param argc
+ * The argc argument that was given to the main() function.
+ * @param argv
+ * The argv argument that was given to the main() function.
+ * @return
+ * - On success, the number of parsed arguments, which is greater or
+ * equal to zero. After the call to rte_eal_init(),
+ * all arguments argv[x] with x < ret may be modified and should
+ * not be accessed by the application.
+ * - On failure, a negative error value.
+ */
+int rte_eal_init(int argc, char **argv);
+
+/**
+ * Check if a primary process is currently alive
+ *
+ * This function returns true when a primary process is currently
+ * active.
+ *
+ * @param config_file_path
+ * The config_file_path argument provided should point at the location
+ * that the primary process will create its config file. If NULL, the default
+ * config file path is used.
+ *
+ * @return
+ * - If alive, returns 1.
+ * - If dead, returns 0.
+ */
+int rte_eal_primary_proc_alive(const char *config_file_path);
+
+/**
+ * Usage function typedef used by the application usage function.
+ *
+ * Use this function typedef to define and call rte_set_applcation_usage_hook()
+ * routine.
+ */
+typedef void (*rte_usage_hook_t)(const char * prgname);
+
+/**
+ * Add application usage routine callout from the eal_usage() routine.
+ *
+ * This function allows the application to include its usage message
+ * in the EAL system usage message. The routine rte_set_application_usage_hook()
+ * needs to be called before the rte_eal_init() routine in the application.
+ *
+ * This routine is optional for the application and will behave as if the set
+ * routine was never called as the default behavior.
+ *
+ * @param usage_func
+ * The func argument is a function pointer to the application usage routine.
+ * Called function is defined using rte_usage_hook_t typedef, which is of
+ * the form void rte_usage_func(const char * prgname).
+ *
+ * Calling this routine with a NULL value will reset the usage hook routine and
+ * return the current value, which could be NULL.
+ * @return
+ * - Returns the current value of the rte_application_usage pointer to allow
+ * the caller to daisy chain the usage routines if needing more then one.
+ */
+rte_usage_hook_t
+rte_set_application_usage_hook(rte_usage_hook_t usage_func);
+
+/**
+ * macro to get the lock of tailq in mem_config
+ */
+#define RTE_EAL_TAILQ_RWLOCK (&rte_eal_get_configuration()->mem_config->qlock)
+
+/**
+ * macro to get the multiple lock of mempool shared by mutiple-instance
+ */
+#define RTE_EAL_MEMPOOL_RWLOCK (&rte_eal_get_configuration()->mem_config->mplock)
+
+/**
+ * Whether EAL is using huge pages (disabled by --no-huge option).
+ * The no-huge mode cannot be used with UIO poll-mode drivers like igb/ixgbe.
+ * It is useful for NIC drivers (e.g. librte_pmd_mlx4, librte_pmd_vmxnet3) or
+ * crypto drivers (e.g. librte_crypto_nitrox) provided by third-parties such
+ * as 6WIND.
+ *
+ * @return
+ * Nonzero if hugepages are enabled.
+ */
+int rte_eal_has_hugepages(void);
+
+/**
+ * A wrap API for syscall gettid.
+ *
+ * @return
+ * On success, returns the thread ID of calling process.
+ * It is always successful.
+ */
+int rte_sys_gettid(void);
+
+/**
+ * Get system unique thread id.
+ *
+ * @return
+ * On success, returns the thread ID of calling process.
+ * It is always successful.
+ */
+static inline int rte_gettid(void)
+{
+ static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
+ if (RTE_PER_LCORE(_thread_id) == -1)
+ RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
+ return RTE_PER_LCORE(_thread_id);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_EAL_H_ */
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
new file mode 100644
index 00000000..2b5e0b17
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -0,0 +1,100 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_EAL_MEMCONFIG_H_
+#define _RTE_EAL_MEMCONFIG_H_
+
+#include <rte_tailq.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_malloc_heap.h>
+#include <rte_rwlock.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * the structure for the memory configuration for the RTE.
+ * Used by the rte_config structure. It is separated out, as for multi-process
+ * support, the memory details should be shared across instances
+ */
+struct rte_mem_config {
+ volatile uint32_t magic; /**< Magic number - Sanity check. */
+
+ /* memory topology */
+ uint32_t nchannel; /**< Number of channels (0 if unknown). */
+ uint32_t nrank; /**< Number of ranks (0 if unknown). */
+
+ /**
+ * current lock nest order
+ * - qlock->mlock (ring/hash/lpm)
+ * - mplock->qlock->mlock (mempool)
+ * Notice:
+ * *ALWAYS* obtain qlock first if having to obtain both qlock and mlock
+ */
+ rte_rwlock_t mlock; /**< only used by memzone LIB for thread-safe. */
+ rte_rwlock_t qlock; /**< used for tailq operation for thread safe. */
+ rte_rwlock_t mplock; /**< only used by mempool LIB for thread-safe. */
+
+ uint32_t memzone_cnt; /**< Number of allocated memzones */
+
+ /* memory segments and zones */
+ struct rte_memseg memseg[RTE_MAX_MEMSEG]; /**< Physmem descriptors. */
+ struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */
+
+ struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
+
+ /* Heaps of Malloc per socket */
+ struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
+
+ /* address of mem_config in primary process. used to map shared config into
+ * exact same address the primary process maps it.
+ */
+ uint64_t mem_cfg_addr;
+} __attribute__((__packed__));
+
+
+inline static void
+rte_eal_mcfg_wait_complete(struct rte_mem_config* mcfg)
+{
+ /* wait until shared mem_config finish initialising */
+ while(mcfg->magic != RTE_MAGIC)
+ rte_pause();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*__RTE_EAL_MEMCONFIG_H_*/
diff --git a/lib/librte_eal/common/include/rte_errno.h b/lib/librte_eal/common/include/rte_errno.h
new file mode 100644
index 00000000..2e5cc454
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_errno.h
@@ -0,0 +1,95 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ *
+ * API for error cause tracking
+ */
+
+#ifndef _RTE_ERRNO_H_
+#define _RTE_ERRNO_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_per_lcore.h>
+
+RTE_DECLARE_PER_LCORE(int, _rte_errno); /**< Per core error number. */
+
+/**
+ * Error number value, stored per-thread, which can be queried after
+ * calls to certain functions to determine why those functions failed.
+ *
+ * Uses standard values from errno.h wherever possible, with a small number
+ * of additional possible values for RTE-specific conditions.
+ */
+#define rte_errno RTE_PER_LCORE(_rte_errno)
+
+/**
+ * Function which returns a printable string describing a particular
+ * error code. For non-RTE-specific error codes, this function returns
+ * the value from the libc strerror function.
+ *
+ * @param errnum
+ * The error number to be looked up - generally the value of rte_errno
+ * @return
+ * A pointer to a thread-local string containing the text describing
+ * the error.
+ */
+const char *rte_strerror(int errnum);
+
+#ifndef __ELASTERROR
+/**
+ * Check if we have a defined value for the max system-defined errno values.
+ * if no max defined, start from 1000 to prevent overlap with standard values
+ */
+#define __ELASTERROR 1000
+#endif
+
+/** Error types */
+enum {
+ RTE_MIN_ERRNO = __ELASTERROR, /**< Start numbering above std errno vals */
+
+ E_RTE_SECONDARY, /**< Operation not allowed in secondary processes */
+ E_RTE_NO_CONFIG, /**< Missing rte_config */
+
+ RTE_MAX_ERRNO /**< Max RTE error number */
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ERRNO_H_ */
diff --git a/lib/librte_eal/common/include/rte_hexdump.h b/lib/librte_eal/common/include/rte_hexdump.h
new file mode 100644
index 00000000..5c18a50b
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_hexdump.h
@@ -0,0 +1,89 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_HEXDUMP_H_
+#define _RTE_HEXDUMP_H_
+
+/**
+ * @file
+ * Simple API to dump out memory in a special hex format.
+ */
+
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+* Dump out memory in a special hex dump format.
+*
+* @param f
+* A pointer to a file for output
+* @param title
+* If not NULL this string is printed as a header to the output.
+* @param buf
+* This is the buffer address to print out.
+* @param len
+* The number of bytes to dump out
+* @return
+* None.
+*/
+
+extern void
+rte_hexdump(FILE *f, const char * title, const void * buf, unsigned int len);
+
+/**
+* Dump out memory in a hex format with colons between bytes.
+*
+* @param f
+* A pointer to a file for output
+* @param title
+* If not NULL this string is printed as a header to the output.
+* @param buf
+* This is the buffer address to print out.
+* @param len
+* The number of bytes to dump out
+* @return
+* None.
+*/
+
+void
+rte_memdump(FILE *f, const char * title, const void * buf, unsigned int len);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_HEXDUMP_H_ */
diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h
new file mode 100644
index 00000000..ff11ef3a
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_interrupts.h
@@ -0,0 +1,120 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_INTERRUPTS_H_
+#define _RTE_INTERRUPTS_H_
+
+/**
+ * @file
+ *
+ * The RTE interrupt interface provides functions to register/unregister
+ * callbacks for a specific interrupt.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Interrupt handle */
+struct rte_intr_handle;
+
+/** Function to be registered for the specific interrupt */
+typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle,
+ void *cb_arg);
+
+#include <exec-env/rte_interrupts.h>
+
+/**
+ * It registers the callback for the specific interrupt. Multiple
+ * callbacks cal be registered at the same time.
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ * @param cb
+ * callback address.
+ * @param cb_arg
+ * address of parameter for callback.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int rte_intr_callback_register(struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb, void *cb_arg);
+
+/**
+ * It unregisters the callback according to the specified interrupt handle.
+ *
+ * @param intr_handle
+ * pointer to the interrupt handle.
+ * @param cb
+ * callback address.
+ * @param cb_arg
+ * address of parameter for callback, (void *)-1 means to remove all
+ * registered which has the same callback address.
+ *
+ * @return
+ * - On success, return the number of callback entities removed.
+ * - On failure, a negative value.
+ */
+int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb, void *cb_arg);
+
+/**
+ * It enables the interrupt for the specified handle.
+ *
+ * @param intr_handle
+ * pointer to the interrupt handle.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int rte_intr_enable(struct rte_intr_handle *intr_handle);
+
+/**
+ * It disables the interrupt for the specified handle.
+ *
+ * @param intr_handle
+ * pointer to the interrupt handle.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int rte_intr_disable(struct rte_intr_handle *intr_handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_eal/common/include/rte_keepalive.h b/lib/librte_eal/common/include/rte_keepalive.h
new file mode 100644
index 00000000..10dac2e0
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_keepalive.h
@@ -0,0 +1,108 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file rte_keepalive.h
+ * DPDK RTE LCore Keepalive Monitor.
+ *
+ **/
+
+#ifndef _KEEPALIVE_H_
+#define _KEEPALIVE_H_
+
+#include <rte_memory.h>
+
+#ifndef RTE_KEEPALIVE_MAXCORES
+/**
+ * Number of cores to track.
+ * @note Must be larger than the highest core id. */
+#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
+#endif
+
+/**
+ * Keepalive failure callback.
+ *
+ * Receives a data pointer passed to rte_keepalive_create() and the id of the
+ * failed core.
+ */
+typedef void (*rte_keepalive_failure_callback_t)(
+ void *data,
+ const int id_core);
+
+/**
+ * Keepalive state structure.
+ * @internal
+ */
+struct rte_keepalive;
+
+/**
+ * Initialise keepalive sub-system.
+ * @param callback
+ * Function called upon detection of a dead core.
+ * @param data
+ * Data pointer to be passed to function callback.
+ * @return
+ * Keepalive structure success, NULL on failure.
+ */
+struct rte_keepalive *rte_keepalive_create(
+ rte_keepalive_failure_callback_t callback,
+ void *data);
+
+/**
+ * Checks & handles keepalive state of monitored cores.
+ * @param *ptr_timer Triggering timer (unused)
+ * @param *ptr_data Data pointer (keepalive structure)
+ */
+void rte_keepalive_dispatch_pings(void *ptr_timer, void *ptr_data);
+
+/**
+ * Registers a core for keepalive checks.
+ * @param *keepcfg
+ * Keepalive structure pointer
+ * @param id_core
+ * ID number of core to register.
+ */
+void rte_keepalive_register_core(struct rte_keepalive *keepcfg,
+ const int id_core);
+
+/**
+ * Per-core keepalive check.
+ * @param *keepcfg
+ * Keepalive structure pointer
+ *
+ * This function needs to be called from within the main process loop of
+ * the LCore to be checked.
+ */
+void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg);
+
+#endif /* _KEEPALIVE_H_ */
diff --git a/lib/librte_eal/common/include/rte_launch.h b/lib/librte_eal/common/include/rte_launch.h
new file mode 100644
index 00000000..dd1946da
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_launch.h
@@ -0,0 +1,177 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_LAUNCH_H_
+#define _RTE_LAUNCH_H_
+
+/**
+ * @file
+ *
+ * Launch tasks on other lcores
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * State of an lcore.
+ */
+enum rte_lcore_state_t {
+ WAIT, /**< waiting a new command */
+ RUNNING, /**< executing command */
+ FINISHED, /**< command executed */
+};
+
+/**
+ * Definition of a remote launch function.
+ */
+typedef int (lcore_function_t)(void *);
+
+/**
+ * Launch a function on another lcore.
+ *
+ * To be executed on the MASTER lcore only.
+ *
+ * Sends a message to a slave lcore (identified by the slave_id) that
+ * is in the WAIT state (this is true after the first call to
+ * rte_eal_init()). This can be checked by first calling
+ * rte_eal_wait_lcore(slave_id).
+ *
+ * When the remote lcore receives the message, it switches to
+ * the RUNNING state, then calls the function f with argument arg. Once the
+ * execution is done, the remote lcore switches to a FINISHED state and
+ * the return value of f is stored in a local variable to be read using
+ * rte_eal_wait_lcore().
+ *
+ * The MASTER lcore returns as soon as the message is sent and knows
+ * nothing about the completion of f.
+ *
+ * Note: This function is not designed to offer optimum
+ * performance. It is just a practical way to launch a function on
+ * another lcore at initialization time.
+ *
+ * @param f
+ * The function to be called.
+ * @param arg
+ * The argument for the function.
+ * @param slave_id
+ * The identifier of the lcore on which the function should be executed.
+ * @return
+ * - 0: Success. Execution of function f started on the remote lcore.
+ * - (-EBUSY): The remote lcore is not in a WAIT state.
+ */
+int rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned slave_id);
+
+/**
+ * This enum indicates whether the master core must execute the handler
+ * launched on all logical cores.
+ */
+enum rte_rmt_call_master_t {
+ SKIP_MASTER = 0, /**< lcore handler not executed by master core. */
+ CALL_MASTER, /**< lcore handler executed by master core. */
+};
+
+/**
+ * Launch a function on all lcores.
+ *
+ * Check that each SLAVE lcore is in a WAIT state, then call
+ * rte_eal_remote_launch() for each lcore.
+ *
+ * @param f
+ * The function to be called.
+ * @param arg
+ * The argument for the function.
+ * @param call_master
+ * If call_master set to SKIP_MASTER, the MASTER lcore does not call
+ * the function. If call_master is set to CALL_MASTER, the function
+ * is also called on master before returning. In any case, the master
+ * lcore returns as soon as it finished its job and knows nothing
+ * about the completion of f on the other lcores.
+ * @return
+ * - 0: Success. Execution of function f started on all remote lcores.
+ * - (-EBUSY): At least one remote lcore is not in a WAIT state. In this
+ * case, no message is sent to any of the lcores.
+ */
+int rte_eal_mp_remote_launch(lcore_function_t *f, void *arg,
+ enum rte_rmt_call_master_t call_master);
+
+/**
+ * Get the state of the lcore identified by slave_id.
+ *
+ * To be executed on the MASTER lcore only.
+ *
+ * @param slave_id
+ * The identifier of the lcore.
+ * @return
+ * The state of the lcore.
+ */
+enum rte_lcore_state_t rte_eal_get_lcore_state(unsigned slave_id);
+
+/**
+ * Wait until an lcore finishes its job.
+ *
+ * To be executed on the MASTER lcore only.
+ *
+ * If the slave lcore identified by the slave_id is in a FINISHED state,
+ * switch to the WAIT state. If the lcore is in RUNNING state, wait until
+ * the lcore finishes its job and moves to the FINISHED state.
+ *
+ * @param slave_id
+ * The identifier of the lcore.
+ * @return
+ * - 0: If the lcore identified by the slave_id is in a WAIT state.
+ * - The value that was returned by the previous remote launch
+ * function call if the lcore identified by the slave_id was in a
+ * FINISHED or RUNNING state. In this case, it changes the state
+ * of the lcore to WAIT.
+ */
+int rte_eal_wait_lcore(unsigned slave_id);
+
+/**
+ * Wait until all lcores finish their jobs.
+ *
+ * To be executed on the MASTER lcore only. Issue an
+ * rte_eal_wait_lcore() for every lcore. The return values are
+ * ignored.
+ *
+ * After a call to rte_eal_mp_wait_lcore(), the caller can assume
+ * that all slave lcores are in a WAIT state.
+ */
+void rte_eal_mp_wait_lcore(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LAUNCH_H_ */
diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
new file mode 100644
index 00000000..ac151302
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -0,0 +1,276 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_LCORE_H_
+#define _RTE_LCORE_H_
+
+/**
+ * @file
+ *
+ * API for lcore and socket manipulation
+ *
+ */
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LCORE_ID_ANY UINT32_MAX /**< Any lcore. */
+
+#if defined(__linux__)
+ typedef cpu_set_t rte_cpuset_t;
+#elif defined(__FreeBSD__)
+#include <pthread_np.h>
+ typedef cpuset_t rte_cpuset_t;
+#endif
+
+/**
+ * Structure storing internal configuration (per-lcore)
+ */
+struct lcore_config {
+ unsigned detected; /**< true if lcore was detected */
+ pthread_t thread_id; /**< pthread identifier */
+ int pipe_master2slave[2]; /**< communication pipe with master */
+ int pipe_slave2master[2]; /**< communication pipe with master */
+ lcore_function_t * volatile f; /**< function to call */
+ void * volatile arg; /**< argument of function */
+ volatile int ret; /**< return value of function */
+ volatile enum rte_lcore_state_t state; /**< lcore state */
+ unsigned socket_id; /**< physical socket id for this lcore */
+ unsigned core_id; /**< core number on socket for this lcore */
+ int core_index; /**< relative index, starting from 0 */
+ rte_cpuset_t cpuset; /**< cpu set which the lcore affinity to */
+};
+
+/**
+ * Internal configuration (per-lcore)
+ */
+extern struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+RTE_DECLARE_PER_LCORE(unsigned, _lcore_id); /**< Per thread "lcore id". */
+RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */
+
+/**
+ * Return the ID of the execution unit we are running on.
+ * @return
+ * Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
+ */
+static inline unsigned
+rte_lcore_id(void)
+{
+ return RTE_PER_LCORE(_lcore_id);
+}
+
+/**
+ * Get the id of the master lcore
+ *
+ * @return
+ * the id of the master lcore
+ */
+static inline unsigned
+rte_get_master_lcore(void)
+{
+ return rte_eal_get_configuration()->master_lcore;
+}
+
+/**
+ * Return the number of execution units (lcores) on the system.
+ *
+ * @return
+ * the number of execution units (lcores) on the system.
+ */
+static inline unsigned
+rte_lcore_count(void)
+{
+ const struct rte_config *cfg = rte_eal_get_configuration();
+ return cfg->lcore_count;
+}
+
+/**
+ * Return the index of the lcore starting from zero.
+ * The order is physical or given by command line (-l option).
+ *
+ * @param lcore_id
+ * The targeted lcore, or -1 for the current one.
+ * @return
+ * The relative index, or -1 if not enabled.
+ */
+static inline int
+rte_lcore_index(int lcore_id)
+{
+ if (lcore_id >= RTE_MAX_LCORE)
+ return -1;
+ if (lcore_id < 0)
+ lcore_id = rte_lcore_id();
+ return lcore_config[lcore_id].core_index;
+}
+
+/**
+ * Return the ID of the physical socket of the logical core we are
+ * running on.
+ * @return
+ * the ID of current lcoreid's physical socket
+ */
+unsigned rte_socket_id(void);
+
+/**
+ * Get the ID of the physical socket of the specified lcore
+ *
+ * @param lcore_id
+ * the targeted lcore, which MUST be between 0 and RTE_MAX_LCORE-1.
+ * @return
+ * the ID of lcoreid's physical socket
+ */
+static inline unsigned
+rte_lcore_to_socket_id(unsigned lcore_id)
+{
+ return lcore_config[lcore_id].socket_id;
+}
+
+/**
+ * Test if an lcore is enabled.
+ *
+ * @param lcore_id
+ * The identifier of the lcore, which MUST be between 0 and
+ * RTE_MAX_LCORE-1.
+ * @return
+ * True if the given lcore is enabled; false otherwise.
+ */
+static inline int
+rte_lcore_is_enabled(unsigned lcore_id)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ if (lcore_id >= RTE_MAX_LCORE)
+ return 0;
+ return cfg->lcore_role[lcore_id] != ROLE_OFF;
+}
+
+/**
+ * Get the next enabled lcore ID.
+ *
+ * @param i
+ * The current lcore (reference).
+ * @param skip_master
+ * If true, do not return the ID of the master lcore.
+ * @param wrap
+ * If true, go back to 0 when RTE_MAX_LCORE is reached; otherwise,
+ * return RTE_MAX_LCORE.
+ * @return
+ * The next lcore_id or RTE_MAX_LCORE if not found.
+ */
+static inline unsigned
+rte_get_next_lcore(unsigned i, int skip_master, int wrap)
+{
+ i++;
+ if (wrap)
+ i %= RTE_MAX_LCORE;
+
+ while (i < RTE_MAX_LCORE) {
+ if (!rte_lcore_is_enabled(i) ||
+ (skip_master && (i == rte_get_master_lcore()))) {
+ i++;
+ if (wrap)
+ i %= RTE_MAX_LCORE;
+ continue;
+ }
+ break;
+ }
+ return i;
+}
+/**
+ * Macro to browse all running lcores.
+ */
+#define RTE_LCORE_FOREACH(i) \
+ for (i = rte_get_next_lcore(-1, 0, 0); \
+ i<RTE_MAX_LCORE; \
+ i = rte_get_next_lcore(i, 0, 0))
+
+/**
+ * Macro to browse all running lcores except the master lcore.
+ */
+#define RTE_LCORE_FOREACH_SLAVE(i) \
+ for (i = rte_get_next_lcore(-1, 1, 0); \
+ i<RTE_MAX_LCORE; \
+ i = rte_get_next_lcore(i, 1, 0))
+
+/**
+ * Set core affinity of the current thread.
+ * Support both EAL and non-EAL thread and update TLS.
+ *
+ * @param cpusetp
+ * Point to cpu_set_t for setting current thread affinity.
+ * @return
+ * On success, return 0; otherwise return -1;
+ */
+int rte_thread_set_affinity(rte_cpuset_t *cpusetp);
+
+/**
+ * Get core affinity of the current thread.
+ *
+ * @param cpusetp
+ * Point to cpu_set_t for getting current thread cpu affinity.
+ * It presumes input is not NULL, otherwise it causes panic.
+ *
+ */
+void rte_thread_get_affinity(rte_cpuset_t *cpusetp);
+
+/**
+ * Set thread names.
+ *
+ * Macro to wrap `pthread_setname_np()` with a glibc version check.
+ * Only glibc >= 2.12 supports this feature.
+ *
+ * This macro only used for Linux, BSD does direct libc call.
+ * BSD libc version of function is `pthread_set_name_np()`.
+ */
+#if defined(__DOXYGEN__)
+#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__)
+#endif
+
+#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 12)
+#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__)
+#else
+#define rte_thread_setname(...) 0
+#endif
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_LCORE_H_ */
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
new file mode 100644
index 00000000..2e47e7f6
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -0,0 +1,311 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_LOG_H_
+#define _RTE_LOG_H_
+
+/**
+ * @file
+ *
+ * RTE Logs API
+ *
+ * This file provides a log API to RTE applications.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+/** The rte_log structure. */
+struct rte_logs {
+ uint32_t type; /**< Bitfield with enabled logs. */
+ uint32_t level; /**< Log level. */
+ FILE *file; /**< Pointer to current FILE* for logs. */
+};
+
+/** Global log informations */
+extern struct rte_logs rte_logs;
+
+/* SDK log type */
+#define RTE_LOGTYPE_EAL 0x00000001 /**< Log related to eal. */
+#define RTE_LOGTYPE_MALLOC 0x00000002 /**< Log related to malloc. */
+#define RTE_LOGTYPE_RING 0x00000004 /**< Log related to ring. */
+#define RTE_LOGTYPE_MEMPOOL 0x00000008 /**< Log related to mempool. */
+#define RTE_LOGTYPE_TIMER 0x00000010 /**< Log related to timers. */
+#define RTE_LOGTYPE_PMD 0x00000020 /**< Log related to poll mode driver. */
+#define RTE_LOGTYPE_HASH 0x00000040 /**< Log related to hash table. */
+#define RTE_LOGTYPE_LPM 0x00000080 /**< Log related to LPM. */
+#define RTE_LOGTYPE_KNI 0x00000100 /**< Log related to KNI. */
+#define RTE_LOGTYPE_ACL 0x00000200 /**< Log related to ACL. */
+#define RTE_LOGTYPE_POWER 0x00000400 /**< Log related to power. */
+#define RTE_LOGTYPE_METER 0x00000800 /**< Log related to QoS meter. */
+#define RTE_LOGTYPE_SCHED 0x00001000 /**< Log related to QoS port scheduler. */
+#define RTE_LOGTYPE_PORT 0x00002000 /**< Log related to port. */
+#define RTE_LOGTYPE_TABLE 0x00004000 /**< Log related to table. */
+#define RTE_LOGTYPE_PIPELINE 0x00008000 /**< Log related to pipeline. */
+#define RTE_LOGTYPE_MBUF 0x00010000 /**< Log related to mbuf. */
+#define RTE_LOGTYPE_CRYPTODEV 0x00020000 /**< Log related to cryptodev. */
+
+/* these log types can be used in an application */
+#define RTE_LOGTYPE_USER1 0x01000000 /**< User-defined log type 1. */
+#define RTE_LOGTYPE_USER2 0x02000000 /**< User-defined log type 2. */
+#define RTE_LOGTYPE_USER3 0x04000000 /**< User-defined log type 3. */
+#define RTE_LOGTYPE_USER4 0x08000000 /**< User-defined log type 4. */
+#define RTE_LOGTYPE_USER5 0x10000000 /**< User-defined log type 5. */
+#define RTE_LOGTYPE_USER6 0x20000000 /**< User-defined log type 6. */
+#define RTE_LOGTYPE_USER7 0x40000000 /**< User-defined log type 7. */
+#define RTE_LOGTYPE_USER8 0x80000000 /**< User-defined log type 8. */
+
+/* Can't use 0, as it gives compiler warnings */
+#define RTE_LOG_EMERG 1U /**< System is unusable. */
+#define RTE_LOG_ALERT 2U /**< Action must be taken immediately. */
+#define RTE_LOG_CRIT 3U /**< Critical conditions. */
+#define RTE_LOG_ERR 4U /**< Error conditions. */
+#define RTE_LOG_WARNING 5U /**< Warning conditions. */
+#define RTE_LOG_NOTICE 6U /**< Normal but significant condition. */
+#define RTE_LOG_INFO 7U /**< Informational. */
+#define RTE_LOG_DEBUG 8U /**< Debug-level messages. */
+
+/** The default log stream. */
+extern FILE *eal_default_log_stream;
+
+/**
+ * Change the stream that will be used by the logging system.
+ *
+ * This can be done at any time. The f argument represents the stream
+ * to be used to send the logs. If f is NULL, the default output is
+ * used (stderr).
+ *
+ * @param f
+ * Pointer to the stream.
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int rte_openlog_stream(FILE *f);
+
+/**
+ * Set the global log level.
+ *
+ * After this call, all logs that are lower or equal than level and
+ * lower or equal than the RTE_LOG_LEVEL configuration option will be
+ * displayed.
+ *
+ * @param level
+ * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
+ */
+void rte_set_log_level(uint32_t level);
+
+/**
+ * Get the global log level.
+ */
+uint32_t rte_get_log_level(void);
+
+/**
+ * Enable or disable the log type.
+ *
+ * @param type
+ * Log type, for example, RTE_LOGTYPE_EAL.
+ * @param enable
+ * True for enable; false for disable.
+ */
+void rte_set_log_type(uint32_t type, int enable);
+
+/**
+ * Get the global log type.
+ */
+uint32_t rte_get_log_type(void);
+
+/**
+ * Get the current loglevel for the message being processed.
+ *
+ * Before calling the user-defined stream for logging, the log
+ * subsystem sets a per-lcore variable containing the loglevel and the
+ * logtype of the message being processed. This information can be
+ * accessed by the user-defined log output function through this
+ * function.
+ *
+ * @return
+ * The loglevel of the message being processed.
+ */
+int rte_log_cur_msg_loglevel(void);
+
+/**
+ * Get the current logtype for the message being processed.
+ *
+ * Before calling the user-defined stream for logging, the log
+ * subsystem sets a per-lcore variable containing the loglevel and the
+ * logtype of the message being processed. This information can be
+ * accessed by the user-defined log output function through this
+ * function.
+ *
+ * @return
+ * The logtype of the message being processed.
+ */
+int rte_log_cur_msg_logtype(void);
+
+/**
+ * Enable or disable the history (enabled by default)
+ *
+ * @param enable
+ * true to enable, or 0 to disable history.
+ */
+void rte_log_set_history(int enable);
+
+/**
+ * Dump the log history to a file
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void rte_log_dump_history(FILE *f);
+
+/**
+ * Add a log message to the history.
+ *
+ * This function can be called from a user-defined log stream. It adds
+ * the given message in the history that can be dumped using
+ * rte_log_dump_history().
+ *
+ * @param buf
+ * A data buffer containing the message to be saved in the history.
+ * @param size
+ * The length of the data buffer.
+ * @return
+ * - 0: Success.
+ * - (-ENOBUFS) if there is no room to store the message.
+ */
+int rte_log_add_in_history(const char *buf, size_t size);
+
+/**
+ * Generates a log message.
+ *
+ * The message will be sent in the stream defined by the previous call
+ * to rte_openlog_stream().
+ *
+ * The level argument determines if the log should be displayed or
+ * not, depending on the global rte_logs variable.
+ *
+ * The preferred alternative is the RTE_LOG() function because debug logs may
+ * be removed at compilation time if optimization is enabled. Moreover,
+ * logs are automatically prefixed by type when using the macro.
+ *
+ * @param level
+ * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
+ * @param logtype
+ * The log type, for example, RTE_LOGTYPE_EAL.
+ * @param format
+ * The format string, as in printf(3), followed by the variable arguments
+ * required by the format.
+ * @return
+ * - 0: Success.
+ * - Negative on error.
+ */
+int rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
+#ifdef __GNUC__
+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2))
+ __attribute__((cold))
+#endif
+#endif
+ __attribute__((format(printf, 3, 4)));
+
+/**
+ * Generates a log message.
+ *
+ * The message will be sent in the stream defined by the previous call
+ * to rte_openlog_stream().
+ *
+ * The level argument determines if the log should be displayed or
+ * not, depending on the global rte_logs variable. A trailing
+ * newline may be added if needed.
+ *
+ * The preferred alternative is the RTE_LOG() because debug logs may be
+ * removed at compilation time.
+ *
+ * @param level
+ * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
+ * @param logtype
+ * The log type, for example, RTE_LOGTYPE_EAL.
+ * @param format
+ * The format string, as in printf(3), followed by the variable arguments
+ * required by the format.
+ * @param ap
+ * The va_list of the variable arguments required by the format.
+ * @return
+ * - 0: Success.
+ * - Negative on error.
+ */
+int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
+ __attribute__((format(printf,3,0)));
+
+/**
+ * Generates a log message.
+ *
+ * The RTE_LOG() is equivalent to rte_log() with two differences:
+
+ * - RTE_LOG() can be used to remove debug logs at compilation time,
+ * depending on RTE_LOG_LEVEL configuration option, and compilation
+ * optimization level. If optimization is enabled, the tests
+ * involving constants only are pre-computed. If compilation is done
+ * with -O0, these tests will be done at run time.
+ * - The log level and log type names are smaller, for example:
+ * RTE_LOG(INFO, EAL, "this is a %s", "log");
+ *
+ * @param l
+ * Log level. A value between EMERG (1) and DEBUG (8). The short name is
+ * expanded by the macro, so it cannot be an integer value.
+ * @param t
+ * The log type, for example, EAL. The short name is expanded by the
+ * macro, so it cannot be an integer value.
+ * @param ...
+ * The fmt string, as in printf(3), followed by the variable arguments
+ * required by the format.
+ * @return
+ * - 0: Success.
+ * - Negative on error.
+ */
+#define RTE_LOG(l, t, ...) \
+ (void)((RTE_LOG_ ## l <= RTE_LOG_LEVEL) ? \
+ rte_log(RTE_LOG_ ## l, \
+ RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) : \
+ 0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LOG_H_ */
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
new file mode 100644
index 00000000..74bb78c7
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -0,0 +1,342 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MALLOC_H_
+#define _RTE_MALLOC_H_
+
+/**
+ * @file
+ * RTE Malloc. This library provides methods for dynamically allocating memory
+ * from hugepages.
+ */
+
+#include <stdio.h>
+#include <stddef.h>
+#include <rte_memory.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Structure to hold heap statistics obtained from rte_malloc_get_socket_stats function.
+ */
+struct rte_malloc_socket_stats {
+ size_t heap_totalsz_bytes; /**< Total bytes on heap */
+ size_t heap_freesz_bytes; /**< Total free bytes on heap */
+ size_t greatest_free_size; /**< Size in bytes of largest free block */
+ unsigned free_count; /**< Number of free elements on heap */
+ unsigned alloc_count; /**< Number of allocated elements on heap */
+ size_t heap_allocsz_bytes; /**< Total allocated bytes on heap */
+};
+
+/**
+ * This function allocates memory from the huge-page area of memory. The memory
+ * is not cleared. In NUMA systems, the memory allocated resides on the same
+ * NUMA socket as the core that calls this function.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ * Size (in bytes) to be allocated.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_malloc(const char *type, size_t size, unsigned align);
+
+/**
+ * Allocate zero'ed memory from the heap.
+ *
+ * Equivalent to rte_malloc() except that the memory zone is
+ * initialised with zeros. In NUMA systems, the memory allocated resides on the
+ * same NUMA socket as the core that calls this function.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ * Size (in bytes) to be allocated.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must obviously be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_zmalloc(const char *type, size_t size, unsigned align);
+
+/**
+ * Replacement function for calloc(), using huge-page memory. Memory area is
+ * initialised with zeros. In NUMA systems, the memory allocated resides on the
+ * same NUMA socket as the core that calls this function.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param num
+ * Number of elements to be allocated.
+ * @param size
+ * Size (in bytes) of a single element.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must obviously be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_calloc(const char *type, size_t num, size_t size, unsigned align);
+
+/**
+ * Replacement function for realloc(), using huge-page memory. Reserved area
+ * memory is resized, preserving contents. In NUMA systems, the new area
+ * resides on the same NUMA socket as the old area.
+ *
+ * @param ptr
+ * Pointer to already allocated memory
+ * @param size
+ * Size (in bytes) of new area. If this is 0, memory is freed.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must obviously be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the reallocated memory.
+ */
+void *
+rte_realloc(void *ptr, size_t size, unsigned align);
+
+/**
+ * This function allocates memory from the huge-page area of memory. The memory
+ * is not cleared.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ * Size (in bytes) to be allocated.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @param socket
+ * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function
+ * will behave the same as rte_malloc().
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_malloc_socket(const char *type, size_t size, unsigned align, int socket);
+
+/**
+ * Allocate zero'ed memory from the heap.
+ *
+ * Equivalent to rte_malloc() except that the memory zone is
+ * initialised with zeros.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ * Size (in bytes) to be allocated.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must obviously be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @param socket
+ * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function
+ * will behave the same as rte_zmalloc().
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket);
+
+/**
+ * Replacement function for calloc(), using huge-page memory. Memory area is
+ * initialised with zeros.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param num
+ * Number of elements to be allocated.
+ * @param size
+ * Size (in bytes) of a single element.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must obviously be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @param socket
+ * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function
+ * will behave the same as rte_calloc().
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket);
+
+/**
+ * Frees the memory space pointed to by the provided pointer.
+ *
+ * This pointer must have been returned by a previous call to
+ * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). The behaviour of
+ * rte_free() is undefined if the pointer does not match this requirement.
+ *
+ * If the pointer is NULL, the function does nothing.
+ *
+ * @param ptr
+ * The pointer to memory to be freed.
+ */
+void
+rte_free(void *ptr);
+
+/**
+ * If malloc debug is enabled, check a memory block for header
+ * and trailer markers to indicate that all is well with the block.
+ * If size is non-null, also return the size of the block.
+ *
+ * @param ptr
+ * pointer to the start of a data block, must have been returned
+ * by a previous call to rte_malloc(), rte_zmalloc(), rte_calloc()
+ * or rte_realloc()
+ * @param size
+ * if non-null, and memory block pointer is valid, returns the size
+ * of the memory block
+ * @return
+ * -1 on error, invalid pointer passed or header and trailer markers
+ * are missing or corrupted
+ * 0 on success
+ */
+int
+rte_malloc_validate(const void *ptr, size_t *size);
+
+/**
+ * Get heap statistics for the specified heap.
+ *
+ * @param socket
+ * An unsigned integer specifying the socket to get heap statistics for
+ * @param socket_stats
+ * A structure which provides memory to store statistics
+ * @return
+ * Null on error
+ * Pointer to structure storing statistics on success
+ */
+int
+rte_malloc_get_socket_stats(int socket,
+ struct rte_malloc_socket_stats *socket_stats);
+
+/**
+ * Dump statistics.
+ *
+ * Dump for the specified type to the console. If the type argument is
+ * NULL, all memory types will be dumped.
+ *
+ * @param f
+ * A pointer to a file for output
+ * @param type
+ * A string identifying the type of objects to dump, or NULL
+ * to dump all objects.
+ */
+void
+rte_malloc_dump_stats(FILE *f, const char *type);
+
+/**
+ * Set the maximum amount of allocated memory for this type.
+ *
+ * This is not yet implemented
+ *
+ * @param type
+ * A string identifying the type of allocated objects.
+ * @param max
+ * The maximum amount of allocated bytes for this type.
+ * @return
+ * - 0: Success.
+ * - (-1): Error.
+ */
+int
+rte_malloc_set_limit(const char *type, size_t max);
+
+/**
+ * Return the physical address of a virtual address obtained through
+ * rte_malloc
+ *
+ * @param addr
+ * Adress obtained from a previous rte_malloc call
+ * @return
+ * NULL on error
+ * otherwise return physical address of the buffer
+ */
+phys_addr_t
+rte_malloc_virt2phy(const void *addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MALLOC_H_ */
diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
new file mode 100644
index 00000000..b2703562
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -0,0 +1,55 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MALLOC_HEAP_H_
+#define _RTE_MALLOC_HEAP_H_
+
+#include <stddef.h>
+#include <sys/queue.h>
+#include <rte_spinlock.h>
+#include <rte_memory.h>
+
+/* Number of free lists per heap, grouped by size. */
+#define RTE_HEAP_NUM_FREELISTS 13
+
+/**
+ * Structure to hold malloc heap
+ */
+struct malloc_heap {
+ rte_spinlock_t lock;
+ LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
+ unsigned alloc_count;
+ size_t total_size;
+} __rte_cache_aligned;
+
+#endif /* _RTE_MALLOC_HEAP_H_ */
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
new file mode 100644
index 00000000..f8dbece0
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -0,0 +1,263 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMORY_H_
+#define _RTE_MEMORY_H_
+
+/**
+ * @file
+ *
+ * Memory-related RTE API.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#ifdef RTE_EXEC_ENV_LINUXAPP
+#include <exec-env/rte_dom0_common.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+
+enum rte_page_sizes {
+ RTE_PGSIZE_4K = 1ULL << 12,
+ RTE_PGSIZE_64K = 1ULL << 16,
+ RTE_PGSIZE_256K = 1ULL << 18,
+ RTE_PGSIZE_2M = 1ULL << 21,
+ RTE_PGSIZE_16M = 1ULL << 24,
+ RTE_PGSIZE_256M = 1ULL << 28,
+ RTE_PGSIZE_512M = 1ULL << 29,
+ RTE_PGSIZE_1G = 1ULL << 30,
+ RTE_PGSIZE_4G = 1ULL << 32,
+ RTE_PGSIZE_16G = 1ULL << 34,
+};
+
+#define SOCKET_ID_ANY -1 /**< Any NUMA socket. */
+#define RTE_CACHE_LINE_MASK (RTE_CACHE_LINE_SIZE-1) /**< Cache line mask. */
+
+#define RTE_CACHE_LINE_ROUNDUP(size) \
+ (RTE_CACHE_LINE_SIZE * ((size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE))
+/**< Return the first cache-aligned value greater or equal to size. */
+
+/**< Cache line size in terms of log2 */
+#if RTE_CACHE_LINE_SIZE == 64
+#define RTE_CACHE_LINE_SIZE_LOG2 6
+#elif RTE_CACHE_LINE_SIZE == 128
+#define RTE_CACHE_LINE_SIZE_LOG2 7
+#else
+#error "Unsupported cache line size"
+#endif
+
+#define RTE_CACHE_LINE_MIN_SIZE 64 /**< Minimum Cache line size. */
+
+/**
+ * Force alignment to cache line.
+ */
+#define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
+
+/**
+ * Force minimum cache line alignment.
+ */
+#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
+
+typedef uint64_t phys_addr_t; /**< Physical address definition. */
+#define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1)
+
+/**
+ * Physical memory segment descriptor.
+ */
+struct rte_memseg {
+ phys_addr_t phys_addr; /**< Start physical address. */
+ union {
+ void *addr; /**< Start virtual address. */
+ uint64_t addr_64; /**< Makes sure addr is always 64 bits */
+ };
+#ifdef RTE_LIBRTE_IVSHMEM
+ phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
+#endif
+ size_t len; /**< Length of the segment. */
+ uint64_t hugepage_sz; /**< The pagesize of underlying memory */
+ int32_t socket_id; /**< NUMA socket ID. */
+ uint32_t nchannel; /**< Number of channels. */
+ uint32_t nrank; /**< Number of ranks. */
+#ifdef RTE_LIBRTE_XEN_DOM0
+ /**< store segment MFNs */
+ uint64_t mfn[DOM0_NUM_MEMBLOCK];
+#endif
+} __rte_packed;
+
+/**
+ * Lock page in physical memory and prevent from swapping.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_mem_lock_page(const void *virt);
+
+/**
+ * Get physical address of any mapped virtual address in the current process.
+ * It is found by browsing the /proc/self/pagemap special file.
+ * The page must be locked.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * The physical address or RTE_BAD_PHYS_ADDR on error.
+ */
+phys_addr_t rte_mem_virt2phy(const void *virt);
+
+/**
+ * Get the layout of the available physical memory.
+ *
+ * It can be useful for an application to have the full physical
+ * memory layout to decide the size of a memory zone to reserve. This
+ * table is stored in rte_config (see rte_eal_get_configuration()).
+ *
+ * @return
+ * - On success, return a pointer to a read-only table of struct
+ * rte_physmem_desc elements, containing the layout of all
+ * addressable physical memory. The last element of the table
+ * contains a NULL address.
+ * - On error, return NULL. This should not happen since it is a fatal
+ * error that will probably cause the entire system to panic.
+ */
+const struct rte_memseg *rte_eal_get_physmem_layout(void);
+
+/**
+ * Dump the physical memory layout to the console.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void rte_dump_physmem_layout(FILE *f);
+
+/**
+ * Get the total amount of available physical memory.
+ *
+ * @return
+ * The total amount of available physical memory in bytes.
+ */
+uint64_t rte_eal_get_physmem_size(void);
+
+/**
+ * Get the number of memory channels.
+ *
+ * @return
+ * The number of memory channels on the system. The value is 0 if unknown
+ * or not the same on all devices.
+ */
+unsigned rte_memory_get_nchannel(void);
+
+/**
+ * Get the number of memory ranks.
+ *
+ * @return
+ * The number of memory ranks on the system. The value is 0 if unknown or
+ * not the same on all devices.
+ */
+unsigned rte_memory_get_nrank(void);
+
+#ifdef RTE_LIBRTE_XEN_DOM0
+
+/**< Internal use only - should DOM0 memory mapping be used */
+int rte_xen_dom0_supported(void);
+
+/**< Internal use only - phys to virt mapping for xen */
+phys_addr_t rte_xen_mem_phy2mch(uint32_t, const phys_addr_t);
+
+/**
+ * Return the physical address of elt, which is an element of the pool mp.
+ *
+ * @param memseg_id
+ * The mempool is from which memory segment.
+ * @param phy_addr
+ * physical address of elt.
+ *
+ * @return
+ * The physical address or error.
+ */
+static inline phys_addr_t
+rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr)
+{
+ if (rte_xen_dom0_supported())
+ return rte_xen_mem_phy2mch(memseg_id, phy_addr);
+ else
+ return phy_addr;
+}
+
+/**
+ * Memory init for supporting application running on Xen domain0.
+ *
+ * @param void
+ *
+ * @return
+ * 0: successfully
+ * negative: error
+ */
+int rte_xen_dom0_memory_init(void);
+
+/**
+ * Attach to memory setments of primary process on Xen domain0.
+ *
+ * @param void
+ *
+ * @return
+ * 0: successfully
+ * negative: error
+ */
+int rte_xen_dom0_memory_attach(void);
+#else
+static inline int rte_xen_dom0_supported(void)
+{
+ return 0;
+}
+
+static inline phys_addr_t
+rte_mem_phy2mch(uint32_t memseg_id __rte_unused, const phys_addr_t phy_addr)
+{
+ return phy_addr;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMORY_H_ */
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
new file mode 100644
index 00000000..f69b5a87
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -0,0 +1,305 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMZONE_H_
+#define _RTE_MEMZONE_H_
+
+/**
+ * @file
+ * RTE Memzone
+ *
+ * The goal of the memzone allocator is to reserve contiguous
+ * portions of physical memory. These zones are identified by a name.
+ *
+ * The memzone descriptors are shared by all partitions and are
+ * located in a known place of physical memory. This zone is accessed
+ * using rte_eal_get_configuration(). The lookup (by name) of a
+ * memory zone can be done in any partition and returns the same
+ * physical address.
+ *
+ * A reserved memory zone cannot be unreserved. The reservation shall
+ * be done at initialization time only.
+ */
+
+#include <stdio.h>
+#include <rte_memory.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_MEMZONE_2MB 0x00000001 /**< Use 2MB pages. */
+#define RTE_MEMZONE_1GB 0x00000002 /**< Use 1GB pages. */
+#define RTE_MEMZONE_16MB 0x00000100 /**< Use 16MB pages. */
+#define RTE_MEMZONE_16GB 0x00000200 /**< Use 16GB pages. */
+#define RTE_MEMZONE_256KB 0x00010000 /**< Use 256KB pages. */
+#define RTE_MEMZONE_256MB 0x00020000 /**< Use 256MB pages. */
+#define RTE_MEMZONE_512MB 0x00040000 /**< Use 512MB pages. */
+#define RTE_MEMZONE_4GB 0x00080000 /**< Use 4GB pages. */
+#define RTE_MEMZONE_SIZE_HINT_ONLY 0x00000004 /**< Use available page size */
+
+/**
+ * A structure describing a memzone, which is a contiguous portion of
+ * physical memory identified by a name.
+ */
+struct rte_memzone {
+
+#define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/
+ char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */
+
+ phys_addr_t phys_addr; /**< Start physical address. */
+ union {
+ void *addr; /**< Start virtual address. */
+ uint64_t addr_64; /**< Makes sure addr is always 64-bits */
+ };
+#ifdef RTE_LIBRTE_IVSHMEM
+ phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
+#endif
+ size_t len; /**< Length of the memzone. */
+
+ uint64_t hugepage_sz; /**< The page size of underlying memory */
+
+ int32_t socket_id; /**< NUMA socket ID. */
+
+ uint32_t flags; /**< Characteristics of this memzone. */
+ uint32_t memseg_id; /**< Memseg it belongs. */
+} __attribute__((__packed__));
+
+/**
+ * Reserve a portion of physical memory.
+ *
+ * This function reserves some memory and returns a pointer to a
+ * correctly filled memzone descriptor. If the allocation cannot be
+ * done, return NULL.
+ *
+ * @param name
+ * The name of the memzone. If it already exists, the function will
+ * fail and return NULL.
+ * @param len
+ * The size of the memory to be reserved. If it
+ * is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ * The socket identifier in the case of
+ * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * The flags parameter is used to request memzones to be
+ * taken from specifically sized hugepages.
+ * - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ * - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ * - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ * - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ * - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ * - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ * - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ * - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ * the requested page size is unavailable.
+ * If this flag is not set, the function
+ * will return error on an unavailable size
+ * request.
+ * @return
+ * A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ * on error.
+ * On error case, rte_errno will be set appropriately:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - E_RTE_SECONDARY - function was called from a secondary process instance
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - EEXIST - a memzone with the same name already exists
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ * - EINVAL - invalid parameters
+ */
+const struct rte_memzone *rte_memzone_reserve(const char *name,
+ size_t len, int socket_id,
+ unsigned flags);
+
+/**
+ * Reserve a portion of physical memory with alignment on a specified
+ * boundary.
+ *
+ * This function reserves some memory with alignment on a specified
+ * boundary, and returns a pointer to a correctly filled memzone
+ * descriptor. If the allocation cannot be done or if the alignment
+ * is not a power of 2, returns NULL.
+ *
+ * @param name
+ * The name of the memzone. If it already exists, the function will
+ * fail and return NULL.
+ * @param len
+ * The size of the memory to be reserved. If it
+ * is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ * The socket identifier in the case of
+ * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * The flags parameter is used to request memzones to be
+ * taken from specifically sized hugepages.
+ * - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ * - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ * - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ * - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ * - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ * - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ * - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ * - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ * the requested page size is unavailable.
+ * If this flag is not set, the function
+ * will return error on an unavailable size
+ * request.
+ * @param align
+ * Alignment for resulting memzone. Must be a power of 2.
+ * @return
+ * A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ * on error.
+ * On error case, rte_errno will be set appropriately:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - E_RTE_SECONDARY - function was called from a secondary process instance
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - EEXIST - a memzone with the same name already exists
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ * - EINVAL - invalid parameters
+ */
+const struct rte_memzone *rte_memzone_reserve_aligned(const char *name,
+ size_t len, int socket_id,
+ unsigned flags, unsigned align);
+
+/**
+ * Reserve a portion of physical memory with specified alignment and
+ * boundary.
+ *
+ * This function reserves some memory with specified alignment and
+ * boundary, and returns a pointer to a correctly filled memzone
+ * descriptor. If the allocation cannot be done or if the alignment
+ * or boundary are not a power of 2, returns NULL.
+ * Memory buffer is reserved in a way, that it wouldn't cross specified
+ * boundary. That implies that requested length should be less or equal
+ * then boundary.
+ *
+ * @param name
+ * The name of the memzone. If it already exists, the function will
+ * fail and return NULL.
+ * @param len
+ * The size of the memory to be reserved. If it
+ * is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ * The socket identifier in the case of
+ * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * The flags parameter is used to request memzones to be
+ * taken from specifically sized hugepages.
+ * - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ * - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ * - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ * - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ * - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ * - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ * - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ * - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ * the requested page size is unavailable.
+ * If this flag is not set, the function
+ * will return error on an unavailable size
+ * request.
+ * @param align
+ * Alignment for resulting memzone. Must be a power of 2.
+ * @param bound
+ * Boundary for resulting memzone. Must be a power of 2 or zero.
+ * Zero value implies no boundary condition.
+ * @return
+ * A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ * on error.
+ * On error case, rte_errno will be set appropriately:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - E_RTE_SECONDARY - function was called from a secondary process instance
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - EEXIST - a memzone with the same name already exists
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ * - EINVAL - invalid parameters
+ */
+const struct rte_memzone *rte_memzone_reserve_bounded(const char *name,
+ size_t len, int socket_id,
+ unsigned flags, unsigned align, unsigned bound);
+
+/**
+ * Free a memzone.
+ *
+ * Note: an IVSHMEM zone cannot be freed.
+ *
+ * @param mz
+ * A pointer to the memzone
+ * @return
+ * -EINVAL - invalid parameter, IVSHMEM memzone.
+ * 0 - success
+ */
+int rte_memzone_free(const struct rte_memzone *mz);
+
+/**
+ * Lookup for a memzone.
+ *
+ * Get a pointer to a descriptor of an already reserved memory
+ * zone identified by the name given as an argument.
+ *
+ * @param name
+ * The name of the memzone.
+ * @return
+ * A pointer to a read-only memzone descriptor.
+ */
+const struct rte_memzone *rte_memzone_lookup(const char *name);
+
+/**
+ * Dump all reserved memzones to the console.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void rte_memzone_dump(FILE *f);
+
+/**
+ * Walk list of all memzones
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ */
+void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
+ void *arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMZONE_H_ */
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
new file mode 100644
index 00000000..e692094e
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -0,0 +1,598 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/* BSD LICENSE
+ *
+ * Copyright 2013-2014 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PCI_H_
+#define _RTE_PCI_H_
+
+/**
+ * @file
+ *
+ * RTE PCI Interface
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/queue.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_interrupts.h>
+
+TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */
+TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */
+
+extern struct pci_driver_list pci_driver_list; /**< Global list of PCI drivers. */
+extern struct pci_device_list pci_device_list; /**< Global list of PCI devices. */
+
+/** Pathname of PCI devices directory. */
+#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
+
+/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
+#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
+
+/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */
+#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
+
+/** Nb. of values in PCI device identifier format string. */
+#define PCI_FMT_NVAL 4
+
+/** Nb. of values in PCI resource format. */
+#define PCI_RESOURCE_FMT_NVAL 3
+
+/** IO resource type: memory address space */
+#define IORESOURCE_MEM 0x00000200
+
+/**
+ * A structure describing a PCI resource.
+ */
+struct rte_pci_resource {
+ uint64_t phys_addr; /**< Physical address, 0 if no resource. */
+ uint64_t len; /**< Length of the resource. */
+ void *addr; /**< Virtual address, NULL when not mapped. */
+};
+
+/** Maximum number of PCI resources. */
+#define PCI_MAX_RESOURCE 6
+
+/**
+ * A structure describing an ID for a PCI driver. Each driver provides a
+ * table of these IDs for each device that it supports.
+ */
+struct rte_pci_id {
+ uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
+ uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
+ uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
+ uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */
+};
+
+/**
+ * A structure describing the location of a PCI device.
+ */
+struct rte_pci_addr {
+ uint16_t domain; /**< Device domain */
+ uint8_t bus; /**< Device bus */
+ uint8_t devid; /**< Device ID */
+ uint8_t function; /**< Device function. */
+};
+
+struct rte_devargs;
+
+enum rte_kernel_driver {
+ RTE_KDRV_UNKNOWN = 0,
+ RTE_KDRV_IGB_UIO,
+ RTE_KDRV_VFIO,
+ RTE_KDRV_UIO_GENERIC,
+ RTE_KDRV_NIC_UIO,
+ RTE_KDRV_NONE,
+};
+
+/**
+ * A structure describing a PCI device.
+ */
+struct rte_pci_device {
+ TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */
+ struct rte_pci_addr addr; /**< PCI location. */
+ struct rte_pci_id id; /**< PCI ID. */
+ struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */
+ struct rte_intr_handle intr_handle; /**< Interrupt handle */
+ struct rte_pci_driver *driver; /**< Associated driver */
+ uint16_t max_vfs; /**< sriov enable if not zero */
+ int numa_node; /**< NUMA node connection */
+ struct rte_devargs *devargs; /**< Device user arguments */
+ enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
+};
+
+/** Any PCI device identifier (vendor, device, ...) */
+#define PCI_ANY_ID (0xffff)
+
+#ifdef __cplusplus
+/** C++ macro used to help building up tables of device IDs */
+#define RTE_PCI_DEVICE(vend, dev) \
+ (vend), \
+ (dev), \
+ PCI_ANY_ID, \
+ PCI_ANY_ID
+#else
+/** Macro used to help building up tables of device IDs */
+#define RTE_PCI_DEVICE(vend, dev) \
+ .vendor_id = (vend), \
+ .device_id = (dev), \
+ .subsystem_vendor_id = PCI_ANY_ID, \
+ .subsystem_device_id = PCI_ANY_ID
+#endif
+
+struct rte_pci_driver;
+
+/**
+ * Initialisation function for the driver called during PCI probing.
+ */
+typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *);
+
+/**
+ * Uninitialisation function for the driver called during hotplugging.
+ */
+typedef int (pci_devuninit_t)(struct rte_pci_device *);
+
+/**
+ * A structure describing a PCI driver.
+ */
+struct rte_pci_driver {
+ TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */
+ const char *name; /**< Driver name. */
+ pci_devinit_t *devinit; /**< Device init. function. */
+ pci_devuninit_t *devuninit; /**< Device uninit function. */
+ const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */
+ uint32_t drv_flags; /**< Flags contolling handling of device. */
+};
+
+/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
+#define RTE_PCI_DRV_NEED_MAPPING 0x0001
+/** Device driver must be registered several times until failure - deprecated */
+#pragma GCC poison RTE_PCI_DRV_MULTIPLE
+/** Device needs to be unbound even if no module is provided */
+#define RTE_PCI_DRV_FORCE_UNBIND 0x0004
+/** Device driver supports link state interrupt */
+#define RTE_PCI_DRV_INTR_LSC 0x0008
+/** Device driver supports detaching capability */
+#define RTE_PCI_DRV_DETACHABLE 0x0010
+
+/**
+ * A structure describing a PCI mapping.
+ */
+struct pci_map {
+ void *addr;
+ char *path;
+ uint64_t offset;
+ uint64_t size;
+ uint64_t phaddr;
+};
+
+/**
+ * A structure describing a mapped PCI resource.
+ * For multi-process we need to reproduce all PCI mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct mapped_pci_resource {
+ TAILQ_ENTRY(mapped_pci_resource) next;
+
+ struct rte_pci_addr pci_addr;
+ char path[PATH_MAX];
+ int nb_maps;
+ struct pci_map maps[PCI_MAX_RESOURCE];
+};
+
+/** mapped pci device list */
+TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
+
+/**< Internal use only - Macro used by pci addr parsing functions **/
+#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \
+do { \
+ unsigned long val; \
+ char *end; \
+ errno = 0; \
+ val = strtoul((in), &end, 16); \
+ if (errno != 0 || end[0] != (dlm) || val > (lim)) \
+ return -EINVAL; \
+ (fd) = (typeof (fd))val; \
+ (in) = end + 1; \
+} while(0)
+
+/**
+ * Utility function to produce a PCI Bus-Device-Function value
+ * given a string representation. Assumes that the BDF is provided without
+ * a domain prefix (i.e. domain returned is always 0)
+ *
+ * @param input
+ * The input string to be parsed. Should have the format XX:XX.X
+ * @param dev_addr
+ * The PCI Bus-Device-Function address to be returned. Domain will always be
+ * returned as 0
+ * @return
+ * 0 on success, negative on error.
+ */
+static inline int
+eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr)
+{
+ dev_addr->domain = 0;
+ GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
+ GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
+ GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
+ return 0;
+}
+
+/**
+ * Utility function to produce a PCI Bus-Device-Function value
+ * given a string representation. Assumes that the BDF is provided including
+ * a domain prefix.
+ *
+ * @param input
+ * The input string to be parsed. Should have the format XXXX:XX:XX.X
+ * @param dev_addr
+ * The PCI Bus-Device-Function address to be returned
+ * @return
+ * 0 on success, negative on error.
+ */
+static inline int
+eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
+{
+ GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':');
+ GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
+ GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
+ GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
+ return 0;
+}
+#undef GET_PCIADDR_FIELD
+
+/* Compare two PCI device addresses. */
+/**
+ * Utility function to compare two PCI device addresses.
+ *
+ * @param addr
+ * The PCI Bus-Device-Function address to compare
+ * @param addr2
+ * The PCI Bus-Device-Function address to compare
+ * @return
+ * 0 on equal PCI address.
+ * Positive on addr is greater than addr2.
+ * Negative on addr is less than addr2, or error.
+ */
+static inline int
+rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
+ const struct rte_pci_addr *addr2)
+{
+ uint64_t dev_addr, dev_addr2;
+
+ if ((addr == NULL) || (addr2 == NULL))
+ return -1;
+
+ dev_addr = (addr->domain << 24) | (addr->bus << 16) |
+ (addr->devid << 8) | addr->function;
+ dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
+ (addr2->devid << 8) | addr2->function;
+
+ if (dev_addr > dev_addr2)
+ return 1;
+ else if (dev_addr < dev_addr2)
+ return -1;
+ else
+ return 0;
+}
+
+/**
+ * Scan the content of the PCI bus, and the devices in the devices
+ * list
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_pci_scan(void);
+
+/**
+ * Probe the PCI bus for registered drivers.
+ *
+ * Scan the content of the PCI bus, and call the probe() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int rte_eal_pci_probe(void);
+
+/**
+ * Map the PCI device resources in user space virtual memory address
+ *
+ * Note that driver should not call this function when flag
+ * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for
+ * you when it's on.
+ *
+ * @param dev
+ * A pointer to a rte_pci_device structure describing the device
+ * to use
+ *
+ * @return
+ * 0 on success, negative on error and positive if no driver
+ * is found for the device.
+ */
+int rte_eal_pci_map_device(struct rte_pci_device *dev);
+
+/**
+ * Unmap this device
+ *
+ * @param dev
+ * A pointer to a rte_pci_device structure describing the device
+ * to use
+ */
+void rte_eal_pci_unmap_device(struct rte_pci_device *dev);
+
+/**
+ * @internal
+ * Map a particular resource from a file.
+ *
+ * @param requested_addr
+ * The starting address for the new mapping range.
+ * @param fd
+ * The file descriptor.
+ * @param offset
+ * The offset for the mapping range.
+ * @param size
+ * The size for the mapping range.
+ * @param additional_flags
+ * The additional flags for the mapping range.
+ * @return
+ * - On success, the function returns a pointer to the mapped area.
+ * - On error, the value MAP_FAILED is returned.
+ */
+void *pci_map_resource(void *requested_addr, int fd, off_t offset,
+ size_t size, int additional_flags);
+
+/**
+ * @internal
+ * Unmap a particular resource.
+ *
+ * @param requested_addr
+ * The address for the unmapping range.
+ * @param size
+ * The size for the unmapping range.
+ */
+void pci_unmap_resource(void *requested_addr, size_t size);
+
+/**
+ * Probe the single PCI device.
+ *
+ * Scan the content of the PCI bus, and find the pci device specified by pci
+ * address, then call the probe() function for registered driver that has a
+ * matching entry in its id_table for discovered device.
+ *
+ * @param addr
+ * The PCI Bus-Device-Function address to probe.
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int rte_eal_pci_probe_one(const struct rte_pci_addr *addr);
+
+/**
+ * Close the single PCI device.
+ *
+ * Scan the content of the PCI bus, and find the pci device specified by pci
+ * address, then call the devuninit() function for registered driver that has a
+ * matching entry in its id_table for discovered device.
+ *
+ * @param addr
+ * The PCI Bus-Device-Function address to close.
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int rte_eal_pci_detach(const struct rte_pci_addr *addr);
+
+/**
+ * Dump the content of the PCI bus.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void rte_eal_pci_dump(FILE *f);
+
+/**
+ * Register a PCI driver.
+ *
+ * @param driver
+ * A pointer to a rte_pci_driver structure describing the driver
+ * to be registered.
+ */
+void rte_eal_pci_register(struct rte_pci_driver *driver);
+
+/**
+ * Unregister a PCI driver.
+ *
+ * @param driver
+ * A pointer to a rte_pci_driver structure describing the driver
+ * to be unregistered.
+ */
+void rte_eal_pci_unregister(struct rte_pci_driver *driver);
+
+/**
+ * Read PCI config space.
+ *
+ * @param device
+ * A pointer to a rte_pci_device structure describing the device
+ * to use
+ * @param buf
+ * A data buffer where the bytes should be read into
+ * @param len
+ * The length of the data buffer.
+ * @param offset
+ * The offset into PCI config space
+ */
+int rte_eal_pci_read_config(const struct rte_pci_device *device,
+ void *buf, size_t len, off_t offset);
+
+/**
+ * Write PCI config space.
+ *
+ * @param device
+ * A pointer to a rte_pci_device structure describing the device
+ * to use
+ * @param buf
+ * A data buffer containing the bytes should be written
+ * @param len
+ * The length of the data buffer.
+ * @param offset
+ * The offset into PCI config space
+ */
+int rte_eal_pci_write_config(const struct rte_pci_device *device,
+ const void *buf, size_t len, off_t offset);
+
+/**
+ * A structure used to access io resources for a pci device.
+ * rte_pci_ioport is arch, os, driver specific, and should not be used outside
+ * of pci ioport api.
+ */
+struct rte_pci_ioport {
+ struct rte_pci_device *dev;
+ uint64_t base;
+};
+
+/**
+ * Initialises a rte_pci_ioport object for a pci device io resource.
+ * This object is then used to gain access to those io resources (see below).
+ *
+ * @param dev
+ * A pointer to a rte_pci_device structure describing the device.
+ * to use
+ * @param bar
+ * Index of the io pci resource we want to access.
+ * @param p
+ * The rte_pci_ioport object to be initialized.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
+ struct rte_pci_ioport *p);
+
+/**
+ * Release any resources used in a rte_pci_ioport object.
+ *
+ * @param p
+ * The rte_pci_ioport object to be uninitialized.
+ */
+int rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p);
+
+/**
+ * Read from a io pci resource.
+ *
+ * @param p
+ * The rte_pci_ioport object from which we want to read.
+ * @param data
+ * A data buffer where the bytes should be read into
+ * @param len
+ * The length of the data buffer.
+ * @param offset
+ * The offset into the pci io resource.
+ */
+void rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
+ void *data, size_t len, off_t offset);
+
+/**
+ * Write to a io pci resource.
+ *
+ * @param p
+ * The rte_pci_ioport object to which we want to write.
+ * @param data
+ * A data buffer where the bytes should be read into
+ * @param len
+ * The length of the data buffer.
+ * @param offset
+ * The offset into the pci io resource.
+ */
+void rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
+ const void *data, size_t len, off_t offset);
+
+#ifdef RTE_PCI_CONFIG
+#include <rte_common.h>
+/**
+ * Set special config space registers for performance purpose.
+ * It is deprecated, as all configurations have been moved into
+ * each PMDs respectively.
+ *
+ * @param dev
+ * A pointer to a rte_pci_device structure describing the device
+ * to use
+ */
+void pci_config_space_set(struct rte_pci_device *dev) __rte_deprecated;
+#endif /* RTE_PCI_CONFIG */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PCI_H_ */
diff --git a/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
new file mode 100644
index 00000000..08222510
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
@@ -0,0 +1,70 @@
+/*-
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PCI_DEV_DEFS_H_
+#define _RTE_PCI_DEV_DEFS_H_
+
+/* interrupt mode */
+enum rte_intr_mode {
+ RTE_INTR_MODE_NONE = 0,
+ RTE_INTR_MODE_LEGACY,
+ RTE_INTR_MODE_MSI,
+ RTE_INTR_MODE_MSIX
+};
+
+#endif /* _RTE_PCI_DEV_DEFS_H_ */
diff --git a/lib/librte_eal/common/include/rte_pci_dev_features.h b/lib/librte_eal/common/include/rte_pci_dev_features.h
new file mode 100644
index 00000000..67b986a6
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_pci_dev_features.h
@@ -0,0 +1,69 @@
+/*-
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PCI_DEV_FEATURES_H
+#define _RTE_PCI_DEV_FEATURES_H
+
+#include <rte_pci_dev_feature_defs.h>
+
+#define RTE_INTR_MODE_NONE_NAME "none"
+#define RTE_INTR_MODE_LEGACY_NAME "legacy"
+#define RTE_INTR_MODE_MSI_NAME "msi"
+#define RTE_INTR_MODE_MSIX_NAME "msix"
+
+#endif
diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h b/lib/librte_eal/common/include/rte_pci_dev_ids.h
new file mode 100644
index 00000000..cf7b5487
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_pci_dev_ids.h
@@ -0,0 +1,704 @@
+/*-
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * This file contains a list of the PCI device IDs recognised by DPDK, which
+ * can be used to fill out an array of structures describing the devices.
+ *
+ * Currently four families of devices are recognised: those supported by the
+ * IGB driver, by EM driver, those supported by the IXGBE driver, and by virtio
+ * driver which is a para virtualization driver running in guest virtual machine.
+ * The inclusion of these in an array built using this file depends on the
+ * definition of
+ * RTE_PCI_DEV_ID_DECL_EM
+ * RTE_PCI_DEV_ID_DECL_IGB
+ * RTE_PCI_DEV_ID_DECL_IGBVF
+ * RTE_PCI_DEV_ID_DECL_IXGBE
+ * RTE_PCI_DEV_ID_DECL_IXGBEVF
+ * RTE_PCI_DEV_ID_DECL_I40E
+ * RTE_PCI_DEV_ID_DECL_I40EVF
+ * RTE_PCI_DEV_ID_DECL_VIRTIO
+ * at the time when this file is included.
+ *
+ * In order to populate an array, the user of this file must define this macro:
+ * RTE_PCI_DEV_ID_DECL_IXGBE(vendorID, deviceID). For example:
+ *
+ * @code
+ * struct device {
+ * int vend;
+ * int dev;
+ * };
+ *
+ * struct device devices[] = {
+ * #define RTE_PCI_DEV_ID_DECL_IXGBE(vendorID, deviceID) {vend, dev},
+ * #include <rte_pci_dev_ids.h>
+ * };
+ * @endcode
+ *
+ * Note that this file can be included multiple times within the same file.
+ */
+
+#ifndef RTE_PCI_DEV_ID_DECL_EM
+#define RTE_PCI_DEV_ID_DECL_EM(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_IGB
+#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_IGBVF
+#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_IXGBE
+#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_IXGBEVF
+#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_I40E
+#define RTE_PCI_DEV_ID_DECL_I40E(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_I40EVF
+#define RTE_PCI_DEV_ID_DECL_I40EVF(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_VIRTIO
+#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_VMXNET3
+#define RTE_PCI_DEV_ID_DECL_VMXNET3(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_FM10K
+#define RTE_PCI_DEV_ID_DECL_FM10K(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_FM10KVF
+#define RTE_PCI_DEV_ID_DECL_FM10KVF(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_ENIC
+#define RTE_PCI_DEV_ID_DECL_ENIC(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_BNX2X
+#define RTE_PCI_DEV_ID_DECL_BNX2X(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_BNX2XVF
+#define RTE_PCI_DEV_ID_DECL_BNX2XVF(vend, dev)
+#endif
+
+#ifndef PCI_VENDOR_ID_INTEL
+/** Vendor ID used by Intel devices */
+#define PCI_VENDOR_ID_INTEL 0x8086
+#endif
+
+#ifndef PCI_VENDOR_ID_QUMRANET
+/** Vendor ID used by virtio devices */
+#define PCI_VENDOR_ID_QUMRANET 0x1AF4
+#endif
+
+#ifndef PCI_VENDOR_ID_VMWARE
+/** Vendor ID used by VMware devices */
+#define PCI_VENDOR_ID_VMWARE 0x15AD
+#endif
+
+#ifndef PCI_VENDOR_ID_CISCO
+/** Vendor ID used by Cisco VIC devices */
+#define PCI_VENDOR_ID_CISCO 0x1137
+#endif
+
+#ifndef PCI_VENDOR_ID_BROADCOM
+/** Vendor ID used by Broadcom devices */
+#define PCI_VENDOR_ID_BROADCOM 0x14E4
+#endif
+
+/******************** Physical EM devices from e1000_hw.h ********************/
+
+#define E1000_DEV_ID_82542 0x1000
+#define E1000_DEV_ID_82543GC_FIBER 0x1001
+#define E1000_DEV_ID_82543GC_COPPER 0x1004
+#define E1000_DEV_ID_82544EI_COPPER 0x1008
+#define E1000_DEV_ID_82544EI_FIBER 0x1009
+#define E1000_DEV_ID_82544GC_COPPER 0x100C
+#define E1000_DEV_ID_82544GC_LOM 0x100D
+#define E1000_DEV_ID_82540EM 0x100E
+#define E1000_DEV_ID_82540EM_LOM 0x1015
+#define E1000_DEV_ID_82540EP_LOM 0x1016
+#define E1000_DEV_ID_82540EP 0x1017
+#define E1000_DEV_ID_82540EP_LP 0x101E
+#define E1000_DEV_ID_82545EM_COPPER 0x100F
+#define E1000_DEV_ID_82545EM_FIBER 0x1011
+#define E1000_DEV_ID_82545GM_COPPER 0x1026
+#define E1000_DEV_ID_82545GM_FIBER 0x1027
+#define E1000_DEV_ID_82545GM_SERDES 0x1028
+#define E1000_DEV_ID_82546EB_COPPER 0x1010
+#define E1000_DEV_ID_82546EB_FIBER 0x1012
+#define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D
+#define E1000_DEV_ID_82546GB_COPPER 0x1079
+#define E1000_DEV_ID_82546GB_FIBER 0x107A
+#define E1000_DEV_ID_82546GB_SERDES 0x107B
+#define E1000_DEV_ID_82546GB_PCIE 0x108A
+#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099
+#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5
+#define E1000_DEV_ID_82541EI 0x1013
+#define E1000_DEV_ID_82541EI_MOBILE 0x1018
+#define E1000_DEV_ID_82541ER_LOM 0x1014
+#define E1000_DEV_ID_82541ER 0x1078
+#define E1000_DEV_ID_82541GI 0x1076
+#define E1000_DEV_ID_82541GI_LF 0x107C
+#define E1000_DEV_ID_82541GI_MOBILE 0x1077
+#define E1000_DEV_ID_82547EI 0x1019
+#define E1000_DEV_ID_82547EI_MOBILE 0x101A
+#define E1000_DEV_ID_82547GI 0x1075
+#define E1000_DEV_ID_82571EB_COPPER 0x105E
+#define E1000_DEV_ID_82571EB_FIBER 0x105F
+#define E1000_DEV_ID_82571EB_SERDES 0x1060
+#define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9
+#define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA
+#define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4
+#define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5
+#define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5
+#define E1000_DEV_ID_82571EB_QUAD_COPPER_LP 0x10BC
+#define E1000_DEV_ID_82572EI_COPPER 0x107D
+#define E1000_DEV_ID_82572EI_FIBER 0x107E
+#define E1000_DEV_ID_82572EI_SERDES 0x107F
+#define E1000_DEV_ID_82572EI 0x10B9
+#define E1000_DEV_ID_82573E 0x108B
+#define E1000_DEV_ID_82573E_IAMT 0x108C
+#define E1000_DEV_ID_82573L 0x109A
+#define E1000_DEV_ID_82574L 0x10D3
+#define E1000_DEV_ID_82574LA 0x10F6
+#define E1000_DEV_ID_82583V 0x150C
+#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096
+#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098
+#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA
+#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB
+#define E1000_DEV_ID_ICH8_82567V_3 0x1501
+#define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049
+#define E1000_DEV_ID_ICH8_IGP_AMT 0x104A
+#define E1000_DEV_ID_ICH8_IGP_C 0x104B
+#define E1000_DEV_ID_ICH8_IFE 0x104C
+#define E1000_DEV_ID_ICH8_IFE_GT 0x10C4
+#define E1000_DEV_ID_ICH8_IFE_G 0x10C5
+#define E1000_DEV_ID_ICH8_IGP_M 0x104D
+#define E1000_DEV_ID_ICH9_IGP_M 0x10BF
+#define E1000_DEV_ID_ICH9_IGP_M_AMT 0x10F5
+#define E1000_DEV_ID_ICH9_IGP_M_V 0x10CB
+#define E1000_DEV_ID_ICH9_IGP_AMT 0x10BD
+#define E1000_DEV_ID_ICH9_BM 0x10E5
+#define E1000_DEV_ID_ICH9_IGP_C 0x294C
+#define E1000_DEV_ID_ICH9_IFE 0x10C0
+#define E1000_DEV_ID_ICH9_IFE_GT 0x10C3
+#define E1000_DEV_ID_ICH9_IFE_G 0x10C2
+#define E1000_DEV_ID_ICH10_R_BM_LM 0x10CC
+#define E1000_DEV_ID_ICH10_R_BM_LF 0x10CD
+#define E1000_DEV_ID_ICH10_R_BM_V 0x10CE
+#define E1000_DEV_ID_ICH10_D_BM_LM 0x10DE
+#define E1000_DEV_ID_ICH10_D_BM_LF 0x10DF
+#define E1000_DEV_ID_ICH10_D_BM_V 0x1525
+
+#define E1000_DEV_ID_PCH_M_HV_LM 0x10EA
+#define E1000_DEV_ID_PCH_M_HV_LC 0x10EB
+#define E1000_DEV_ID_PCH_D_HV_DM 0x10EF
+#define E1000_DEV_ID_PCH_D_HV_DC 0x10F0
+#define E1000_DEV_ID_PCH2_LV_LM 0x1502
+#define E1000_DEV_ID_PCH2_LV_V 0x1503
+#define E1000_DEV_ID_PCH_LPT_I217_LM 0x153A
+#define E1000_DEV_ID_PCH_LPT_I217_V 0x153B
+#define E1000_DEV_ID_PCH_LPTLP_I218_LM 0x155A
+#define E1000_DEV_ID_PCH_LPTLP_I218_V 0x1559
+#define E1000_DEV_ID_PCH_I218_LM2 0x15A0
+#define E1000_DEV_ID_PCH_I218_V2 0x15A1
+#define E1000_DEV_ID_PCH_I218_LM3 0x15A2
+#define E1000_DEV_ID_PCH_I218_V3 0x15A3
+
+
+/*
+ * Tested (supported) on VM emulated HW.
+ */
+
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82540EM)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82545EM_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82545EM_FIBER)
+
+/*
+ * Tested (supported) on real HW.
+ */
+
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_FIBER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_QUAD_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_FIBER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES_DUAL)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES_QUAD)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571PT_QUAD_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_FIBER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_FIBER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_SERDES)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82573L)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82574L)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82574LA)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82583V)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPT_I217_LM)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPT_I217_V)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPTLP_I218_LM)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPTLP_I218_V)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_LM2)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_V2)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_LM3)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_V3)
+
+
+/******************** Physical IGB devices from e1000_hw.h ********************/
+
+#define E1000_DEV_ID_82576 0x10C9
+#define E1000_DEV_ID_82576_FIBER 0x10E6
+#define E1000_DEV_ID_82576_SERDES 0x10E7
+#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8
+#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526
+#define E1000_DEV_ID_82576_NS 0x150A
+#define E1000_DEV_ID_82576_NS_SERDES 0x1518
+#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D
+#define E1000_DEV_ID_82575EB_COPPER 0x10A7
+#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9
+#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6
+#define E1000_DEV_ID_82580_COPPER 0x150E
+#define E1000_DEV_ID_82580_FIBER 0x150F
+#define E1000_DEV_ID_82580_SERDES 0x1510
+#define E1000_DEV_ID_82580_SGMII 0x1511
+#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516
+#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527
+#define E1000_DEV_ID_I350_COPPER 0x1521
+#define E1000_DEV_ID_I350_FIBER 0x1522
+#define E1000_DEV_ID_I350_SERDES 0x1523
+#define E1000_DEV_ID_I350_SGMII 0x1524
+#define E1000_DEV_ID_I350_DA4 0x1546
+#define E1000_DEV_ID_I210_COPPER 0x1533
+#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534
+#define E1000_DEV_ID_I210_COPPER_IT 0x1535
+#define E1000_DEV_ID_I210_FIBER 0x1536
+#define E1000_DEV_ID_I210_SERDES 0x1537
+#define E1000_DEV_ID_I210_SGMII 0x1538
+#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B
+#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C
+#define E1000_DEV_ID_I211_COPPER 0x1539
+#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40
+#define E1000_DEV_ID_I354_SGMII 0x1F41
+#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45
+#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438
+#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A
+#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C
+#define E1000_DEV_ID_DH89XXCC_SFP 0x0440
+
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_FIBER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD)
+
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER)
+
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_FIBER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER)
+
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_DA4)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_OEM1)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_IT)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_FIBER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SGMII)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I211_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_SGMII)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SGMII)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP)
+
+/****************** Physical IXGBE devices from ixgbe_type.h ******************/
+
+#define IXGBE_DEV_ID_82598 0x10B6
+#define IXGBE_DEV_ID_82598_BX 0x1508
+#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6
+#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7
+#define IXGBE_DEV_ID_82598AT 0x10C8
+#define IXGBE_DEV_ID_82598AT2 0x150B
+#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB
+#define IXGBE_DEV_ID_82598EB_CX4 0x10DD
+#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC
+#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1
+#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1
+#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4
+#define IXGBE_DEV_ID_82599_KX4 0x10F7
+#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514
+#define IXGBE_DEV_ID_82599_KR 0x1517
+#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8
+#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C
+#define IXGBE_DEV_ID_82599_CX4 0x10F9
+#define IXGBE_DEV_ID_82599_SFP 0x10FB
+#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9
+#define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72
+#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0
+#define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470
+#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A
+#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529
+#define IXGBE_DEV_ID_82599_SFP_EM 0x1507
+#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D
+#define IXGBE_DEV_ID_82599_SFP_SF_QP 0x154A
+#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558
+#define IXGBE_DEV_ID_82599EN_SFP 0x1557
+#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC
+#define IXGBE_DEV_ID_82599_T3_LOM 0x151C
+#define IXGBE_DEV_ID_82599_LS 0x154F
+#define IXGBE_DEV_ID_X540T 0x1528
+#define IXGBE_DEV_ID_X540T1 0x1560
+#define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC
+#define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD
+#define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE
+#define IXGBE_DEV_ID_X550T 0x1563
+#define IXGBE_DEV_ID_X550T1 0x15D1
+#define IXGBE_DEV_ID_X550EM_A_KR 0x15C2
+#define IXGBE_DEV_ID_X550EM_A_KR_L 0x15C3
+#define IXGBE_DEV_ID_X550EM_A_SFP_N 0x15C4
+#define IXGBE_DEV_ID_X550EM_A_1G_T 0x15C6
+#define IXGBE_DEV_ID_X550EM_A_1G_T_L 0x15C7
+#define IXGBE_DEV_ID_X550EM_A_10G_T 0x15C8
+#define IXGBE_DEV_ID_X550EM_A_QSFP 0x15CA
+#define IXGBE_DEV_ID_X550EM_A_QSFP_N 0x15CC
+#define IXGBE_DEV_ID_X550EM_A_SFP 0x15CE
+#define IXGBE_DEV_ID_X550EM_X_KX4 0x15AA
+#define IXGBE_DEV_ID_X550EM_X_KR 0x15AB
+
+#ifdef RTE_NIC_BYPASS
+#define IXGBE_DEV_ID_82599_BYPASS 0x155D
+#endif
+
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_BX)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
+ IXGBE_DEV_ID_82598AF_SINGLE_PORT)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT2)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_CX4)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
+ IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_XF_LR)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KR)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
+ IXGBE_DEV_ID_82599_COMBO_BACKPLANE)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
+ IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_CX4)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_SFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_RNDC)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_560FLR)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_ECNA_DP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_FCOE)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_EM)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF2)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599EN_SFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_XAUI_LOM)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_T3_LOM)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_LS)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T1)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_SFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_10G_T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_1G_T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR)
+
+#ifdef RTE_NIC_BYPASS
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS)
+#endif
+
+/*************** Physical I40E devices from i40e_type.h *****************/
+
+#define I40E_DEV_ID_SFP_XL710 0x1572
+#define I40E_DEV_ID_QEMU 0x1574
+#define I40E_DEV_ID_KX_B 0x1580
+#define I40E_DEV_ID_KX_C 0x1581
+#define I40E_DEV_ID_QSFP_A 0x1583
+#define I40E_DEV_ID_QSFP_B 0x1584
+#define I40E_DEV_ID_QSFP_C 0x1585
+#define I40E_DEV_ID_10G_BASE_T 0x1586
+#define I40E_DEV_ID_20G_KR2 0x1587
+#define I40E_DEV_ID_20G_KR2_A 0x1588
+#define I40E_DEV_ID_10G_BASE_T4 0x1589
+#define I40E_DEV_ID_X722_A0 0x374C
+#define I40E_DEV_ID_KX_X722 0x37CE
+#define I40E_DEV_ID_QSFP_X722 0x37CF
+#define I40E_DEV_ID_SFP_X722 0x37D0
+#define I40E_DEV_ID_1G_BASE_T_X722 0x37D1
+#define I40E_DEV_ID_10G_BASE_T_X722 0x37D2
+
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_XL710)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QEMU)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_B)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_C)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_A)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_B)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_C)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2_A)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T4)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_A0)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_X722)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_X722)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_X722)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_1G_BASE_T_X722)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T_X722)
+
+/*************** Physical FM10K devices from fm10k_type.h ***************/
+
+#define FM10K_DEV_ID_PF 0x15A4
+#define FM10K_DEV_ID_SDI_FM10420_QDA2 0x15D0
+
+RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_PF)
+RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2)
+
+/****************** Virtual IGB devices from e1000_hw.h ******************/
+
+#define E1000_DEV_ID_82576_VF 0x10CA
+#define E1000_DEV_ID_82576_VF_HV 0x152D
+#define E1000_DEV_ID_I350_VF 0x1520
+#define E1000_DEV_ID_I350_VF_HV 0x152F
+
+RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF)
+RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF_HV)
+RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF)
+RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF_HV)
+
+/****************** Virtual IXGBE devices from ixgbe_type.h ******************/
+
+#define IXGBE_DEV_ID_82599_VF 0x10ED
+#define IXGBE_DEV_ID_82599_VF_HV 0x152E
+#define IXGBE_DEV_ID_X540_VF 0x1515
+#define IXGBE_DEV_ID_X540_VF_HV 0x1530
+#define IXGBE_DEV_ID_X550_VF_HV 0x1564
+#define IXGBE_DEV_ID_X550_VF 0x1565
+#define IXGBE_DEV_ID_X550EM_A_VF 0x15C5
+#define IXGBE_DEV_ID_X550EM_A_VF_HV 0x15B4
+#define IXGBE_DEV_ID_X550EM_X_VF 0x15A8
+#define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9
+
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF_HV)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF_HV)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV)
+
+/****************** Virtual I40E devices from i40e_type.h ********************/
+
+#define I40E_DEV_ID_VF 0x154C
+#define I40E_DEV_ID_VF_HV 0x1571
+#define I40E_DEV_ID_X722_A0_VF 0x374D
+#define I40E_DEV_ID_X722_VF 0x37CD
+#define I40E_DEV_ID_X722_VF_HV 0x37D9
+
+RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_VF)
+RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_VF_HV)
+RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_A0_VF)
+RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_VF)
+RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_VF_HV)
+
+/****************** Virtio devices from virtio.h ******************/
+
+#define QUMRANET_DEV_ID_VIRTIO 0x1000
+
+RTE_PCI_DEV_ID_DECL_VIRTIO(PCI_VENDOR_ID_QUMRANET, QUMRANET_DEV_ID_VIRTIO)
+
+/****************** VMware VMXNET3 devices ******************/
+
+#define VMWARE_DEV_ID_VMXNET3 0x07B0
+
+RTE_PCI_DEV_ID_DECL_VMXNET3(PCI_VENDOR_ID_VMWARE, VMWARE_DEV_ID_VMXNET3)
+
+/*************** Virtual FM10K devices from fm10k_type.h ***************/
+
+#define FM10K_DEV_ID_VF 0x15A5
+
+RTE_PCI_DEV_ID_DECL_FM10KVF(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_VF)
+
+/****************** Cisco VIC devices ******************/
+
+#define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */
+#define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */
+
+RTE_PCI_DEV_ID_DECL_ENIC(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET)
+RTE_PCI_DEV_ID_DECL_ENIC(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF)
+
+/****************** QLogic devices ******************/
+
+/* Broadcom/QLogic BNX2X */
+#define BNX2X_DEV_ID_57710 0x164e
+#define BNX2X_DEV_ID_57711 0x164f
+#define BNX2X_DEV_ID_57711E 0x1650
+#define BNX2X_DEV_ID_57712 0x1662
+#define BNX2X_DEV_ID_57712_MF 0x1663
+#define BNX2X_DEV_ID_57712_VF 0x166f
+#define BNX2X_DEV_ID_57713 0x1651
+#define BNX2X_DEV_ID_57713E 0x1652
+#define BNX2X_DEV_ID_57800 0x168a
+#define BNX2X_DEV_ID_57800_MF 0x16a5
+#define BNX2X_DEV_ID_57800_VF 0x16a9
+#define BNX2X_DEV_ID_57810 0x168e
+#define BNX2X_DEV_ID_57810_MF 0x16ae
+#define BNX2X_DEV_ID_57810_VF 0x16af
+#define BNX2X_DEV_ID_57811 0x163d
+#define BNX2X_DEV_ID_57811_MF 0x163e
+#define BNX2X_DEV_ID_57811_VF 0x163f
+
+#define BNX2X_DEV_ID_57840_OBS 0x168d
+#define BNX2X_DEV_ID_57840_OBS_MF 0x16ab
+#define BNX2X_DEV_ID_57840_4_10 0x16a1
+#define BNX2X_DEV_ID_57840_2_20 0x16a2
+#define BNX2X_DEV_ID_57840_MF 0x16a4
+#define BNX2X_DEV_ID_57840_VF 0x16ad
+
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57800)
+RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57800_VF)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57711)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810)
+RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810_VF)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811)
+RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_VF)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_OBS)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_4_10)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_2_20)
+RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_VF)
+#ifdef RTE_LIBRTE_BNX2X_MF_SUPPORT
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810_MF)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_MF)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_MF)
+#endif
+
+/*
+ * Undef all RTE_PCI_DEV_ID_DECL_* here.
+ */
+#undef RTE_PCI_DEV_ID_DECL_BNX2X
+#undef RTE_PCI_DEV_ID_DECL_BNX2XVF
+#undef RTE_PCI_DEV_ID_DECL_EM
+#undef RTE_PCI_DEV_ID_DECL_IGB
+#undef RTE_PCI_DEV_ID_DECL_IGBVF
+#undef RTE_PCI_DEV_ID_DECL_IXGBE
+#undef RTE_PCI_DEV_ID_DECL_IXGBEVF
+#undef RTE_PCI_DEV_ID_DECL_I40E
+#undef RTE_PCI_DEV_ID_DECL_I40EVF
+#undef RTE_PCI_DEV_ID_DECL_VIRTIO
+#undef RTE_PCI_DEV_ID_DECL_VMXNET3
+#undef RTE_PCI_DEV_ID_DECL_FM10K
+#undef RTE_PCI_DEV_ID_DECL_FM10KVF
diff --git a/lib/librte_eal/common/include/rte_per_lcore.h b/lib/librte_eal/common/include/rte_per_lcore.h
new file mode 100644
index 00000000..5434729a
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_per_lcore.h
@@ -0,0 +1,79 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PER_LCORE_H_
+#define _RTE_PER_LCORE_H_
+
+/**
+ * @file
+ *
+ * Per-lcore variables in RTE
+ *
+ * This file defines an API for instantiating per-lcore "global
+ * variables" that are environment-specific. Note that in all
+ * environments, a "shared variable" is the default when you use a
+ * global variable.
+ *
+ * Parts of this are execution environment specific.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <pthread.h>
+
+/**
+ * Macro to define a per lcore variable "var" of type "type", don't
+ * use keywords like "static" or "volatile" in type, just prefix the
+ * whole macro.
+ */
+#define RTE_DEFINE_PER_LCORE(type, name) \
+ __thread __typeof__(type) per_lcore_##name
+
+/**
+ * Macro to declare an extern per lcore variable "var" of type "type"
+ */
+#define RTE_DECLARE_PER_LCORE(type, name) \
+ extern __thread __typeof__(type) per_lcore_##name
+
+/**
+ * Read/write the per-lcore variable value
+ */
+#define RTE_PER_LCORE(name) (per_lcore_##name)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PER_LCORE_H_ */
diff --git a/lib/librte_eal/common/include/rte_random.h b/lib/librte_eal/common/include/rte_random.h
new file mode 100644
index 00000000..24ae8363
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_random.h
@@ -0,0 +1,91 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_RANDOM_H_
+#define _RTE_RANDOM_H_
+
+/**
+ * @file
+ *
+ * Pseudo-random Generators in RTE
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/**
+ * Seed the pseudo-random generator.
+ *
+ * The generator is automatically seeded by the EAL init with a timer
+ * value. It may need to be re-seeded by the user with a real random
+ * value.
+ *
+ * @param seedval
+ * The value of the seed.
+ */
+static inline void
+rte_srand(uint64_t seedval)
+{
+ srand48((long unsigned int)seedval);
+}
+
+/**
+ * Get a pseudo-random value.
+ *
+ * This function generates pseudo-random numbers using the linear
+ * congruential algorithm and 48-bit integer arithmetic, called twice
+ * to generate a 64-bit value.
+ *
+ * @return
+ * A pseudo-random value between 0 and (1<<64)-1.
+ */
+static inline uint64_t
+rte_rand(void)
+{
+ uint64_t val;
+ val = lrand48();
+ val <<= 32;
+ val += lrand48();
+ return val;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_PER_LCORE_H_ */
diff --git a/lib/librte_eal/common/include/rte_string_fns.h b/lib/librte_eal/common/include/rte_string_fns.h
new file mode 100644
index 00000000..cfca2f8d
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_string_fns.h
@@ -0,0 +1,81 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ *
+ * String-related functions as replacement for libc equivalents
+ */
+
+#ifndef _RTE_STRING_FNS_H_
+#define _RTE_STRING_FNS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Takes string "string" parameter and splits it at character "delim"
+ * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like
+ * strtok or strsep functions, this modifies its input string, by replacing
+ * instances of "delim" with '\\0'. All resultant tokens are returned in the
+ * "tokens" array which must have enough entries to hold "maxtokens".
+ *
+ * @param string
+ * The input string to be split into tokens
+ *
+ * @param stringlen
+ * The max length of the input buffer
+ *
+ * @param tokens
+ * The array to hold the pointers to the tokens in the string
+ *
+ * @param maxtokens
+ * The number of elements in the tokens array. At most, maxtokens-1 splits
+ * of the string will be done.
+ *
+ * @param delim
+ * The character on which the split of the data will be done
+ *
+ * @return
+ * The number of tokens in the tokens array.
+ */
+int
+rte_strsplit(char *string, int stringlen,
+ char **tokens, int maxtokens, char delim);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_STRING_FNS_H */
diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h
new file mode 100644
index 00000000..4a686e68
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_tailq.h
@@ -0,0 +1,162 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_TAILQ_H_
+#define _RTE_TAILQ_H_
+
+/**
+ * @file
+ * Here defines rte_tailq APIs for only internal use
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+#include <stdio.h>
+#include <rte_debug.h>
+
+/** dummy structure type used by the rte_tailq APIs */
+struct rte_tailq_entry {
+ TAILQ_ENTRY(rte_tailq_entry) next; /**< Pointer entries for a tailq list */
+ void *data; /**< Pointer to the data referenced by this tailq entry */
+};
+/** dummy */
+TAILQ_HEAD(rte_tailq_entry_head, rte_tailq_entry);
+
+#define RTE_TAILQ_NAMESIZE 32
+
+/**
+ * The structure defining a tailq header entry for storing
+ * in the rte_config structure in shared memory. Each tailq
+ * is identified by name.
+ * Any library storing a set of objects e.g. rings, mempools, hash-tables,
+ * is recommended to use an entry here, so as to make it easy for
+ * a multi-process app to find already-created elements in shared memory.
+ */
+struct rte_tailq_head {
+ struct rte_tailq_entry_head tailq_head; /**< NOTE: must be first element */
+ char name[RTE_TAILQ_NAMESIZE];
+};
+
+struct rte_tailq_elem {
+ /**
+ * Reference to head in shared mem, updated at init time by
+ * rte_eal_tailqs_init()
+ */
+ struct rte_tailq_head *head;
+ TAILQ_ENTRY(rte_tailq_elem) next;
+ const char name[RTE_TAILQ_NAMESIZE];
+};
+
+/**
+ * Return the first tailq entry casted to the right struct.
+ */
+#define RTE_TAILQ_CAST(tailq_entry, struct_name) \
+ (struct struct_name *)&(tailq_entry)->tailq_head
+
+/**
+ * Utility macro to make looking up a tailqueue for a particular struct easier.
+ *
+ * @param name
+ * The name of tailq
+ *
+ * @param struct_name
+ * The name of the list type we are using. (Generally this is the same as the
+ * first parameter passed to TAILQ_HEAD macro)
+ *
+ * @return
+ * The return value from rte_eal_tailq_lookup, typecast to the appropriate
+ * structure pointer type.
+ * NULL on error, since the tailq_head is the first
+ * element in the rte_tailq_head structure.
+ */
+#define RTE_TAILQ_LOOKUP(name, struct_name) \
+ RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name)
+
+/**
+ * Dump tail queues to the console.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void rte_dump_tailq(FILE *f);
+
+/**
+ * Lookup for a tail queue.
+ *
+ * Get a pointer to a tail queue header of a tail
+ * queue identified by the name given as an argument.
+ * Note: this function is not multi-thread safe, and should only be called from
+ * a single thread at a time
+ *
+ * @param name
+ * The name of the queue.
+ * @return
+ * A pointer to the tail queue head structure.
+ */
+struct rte_tailq_head *rte_eal_tailq_lookup(const char *name);
+
+/**
+ * Register a tail queue.
+ *
+ * Register a tail queue from shared memory.
+ * This function is mainly used by EAL_REGISTER_TAILQ macro which is used to
+ * register tailq from the different dpdk libraries. Since this macro is a
+ * constructor, the function has no access to dpdk shared memory, so the
+ * registered tailq can not be used before call to rte_eal_init() which calls
+ * rte_eal_tailqs_init().
+ *
+ * @param t
+ * The tailq element which contains the name of the tailq you want to
+ * create (/retrieve when in secondary process).
+ * @return
+ * 0 on success or -1 in case of an error.
+ */
+int rte_eal_tailq_register(struct rte_tailq_elem *t);
+
+#define EAL_REGISTER_TAILQ(t) \
+void tailqinitfn_ ##t(void); \
+void __attribute__((constructor, used)) tailqinitfn_ ##t(void) \
+{ \
+ if (rte_eal_tailq_register(&t) < 0) \
+ rte_panic("Cannot initialize tailq: %s\n", t.name); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TAILQ_H_ */
diff --git a/lib/librte_eal/common/include/rte_time.h b/lib/librte_eal/common/include/rte_time.h
new file mode 100644
index 00000000..4b13b9c1
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_time.h
@@ -0,0 +1,122 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define NSEC_PER_SEC 1000000000L
+
+/**
+ * Structure to hold the parameters of a running cycle counter to assist
+ * in converting cycles to nanoseconds.
+ */
+struct rte_timecounter {
+ /** Last cycle counter value read. */
+ uint64_t cycle_last;
+ /** Nanoseconds count. */
+ uint64_t nsec;
+ /** Bitmask separating nanosecond and sub-nanoseconds. */
+ uint64_t nsec_mask;
+ /** Sub-nanoseconds count. */
+ uint64_t nsec_frac;
+ /** Bitmask for two's complement substraction of non-64 bit counters. */
+ uint64_t cc_mask;
+ /** Cycle to nanosecond divisor (power of two). */
+ uint32_t cc_shift;
+};
+
+/**
+ * Converts cyclecounter cycles to nanoseconds.
+ */
+static inline uint64_t
+rte_cyclecounter_cycles_to_ns(struct rte_timecounter *tc, uint64_t cycles)
+{
+ uint64_t ns;
+
+ /* Add fractional nanoseconds. */
+ ns = cycles + tc->nsec_frac;
+ tc->nsec_frac = ns & tc->nsec_mask;
+
+ /* Shift to get only nanoseconds. */
+ return ns >> tc->cc_shift;
+}
+
+/**
+ * Update the internal nanosecond count in the structure.
+ */
+static inline uint64_t
+rte_timecounter_update(struct rte_timecounter *tc, uint64_t cycle_now)
+{
+ uint64_t cycle_delta, ns_offset;
+
+ /* Calculate the delta since the last call. */
+ if (tc->cycle_last <= cycle_now)
+ cycle_delta = (cycle_now - tc->cycle_last) & tc->cc_mask;
+ else
+ /* Handle cycle counts that have wrapped around . */
+ cycle_delta = (~(tc->cycle_last - cycle_now) & tc->cc_mask) + 1;
+
+ /* Convert to nanoseconds. */
+ ns_offset = rte_cyclecounter_cycles_to_ns(tc, cycle_delta);
+
+ /* Store current cycle counter for next call. */
+ tc->cycle_last = cycle_now;
+
+ /* Update the nanosecond count. */
+ tc->nsec += ns_offset;
+
+ return tc->nsec;
+}
+
+/**
+ * Convert from timespec structure into nanosecond units.
+ */
+static inline uint64_t
+rte_timespec_to_ns(const struct timespec *ts)
+{
+ return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+/**
+ * Convert from nanosecond units into timespec structure.
+ */
+static inline struct timespec
+rte_ns_to_timespec(uint64_t nsec)
+{
+ struct timespec ts = {0, 0};
+
+ if (nsec == 0)
+ return ts;
+
+ ts.tv_sec = nsec / NSEC_PER_SEC;
+ ts.tv_nsec = nsec % NSEC_PER_SEC;
+
+ return ts;
+}
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
new file mode 100644
index 00000000..f8ea6d73
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -0,0 +1,130 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Definitions of DPDK version numbers
+ */
+
+#ifndef _RTE_VERSION_H_
+#define _RTE_VERSION_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+#include <rte_common.h>
+
+/**
+ * String that appears before the version number
+ */
+#define RTE_VER_PREFIX "DPDK"
+
+/**
+ * Major version/year number i.e. the yy in yy.mm.z
+ */
+#define RTE_VER_YEAR 16
+
+/**
+ * Minor version/month number i.e. the mm in yy.mm.z
+ */
+#define RTE_VER_MONTH 4
+
+/**
+ * Patch level number i.e. the z in yy.mm.z
+ */
+#define RTE_VER_MINOR 0
+
+/**
+ * Extra string to be appended to version number
+ */
+#define RTE_VER_SUFFIX ""
+
+/**
+ * Patch release number
+ * 0-15 = release candidates
+ * 16 = release
+ */
+#define RTE_VER_RELEASE 16
+
+/**
+ * Macro to compute a version number usable for comparisons
+ */
+#define RTE_VERSION_NUM(a,b,c,d) ((a) << 24 | (b) << 16 | (c) << 8 | (d))
+
+/**
+ * All version numbers in one to compare with RTE_VERSION_NUM()
+ */
+#define RTE_VERSION RTE_VERSION_NUM( \
+ RTE_VER_YEAR, \
+ RTE_VER_MONTH, \
+ RTE_VER_MINOR, \
+ RTE_VER_RELEASE)
+
+/**
+ * Function returning version string
+ * @return
+ * string
+ */
+static inline const char *
+rte_version(void)
+{
+ static char version[32];
+ if (version[0] != 0)
+ return version;
+ if (strlen(RTE_VER_SUFFIX) == 0)
+ snprintf(version, sizeof(version), "%s %d.%02d.%d",
+ RTE_VER_PREFIX,
+ RTE_VER_YEAR,
+ RTE_VER_MONTH,
+ RTE_VER_MINOR);
+ else
+ snprintf(version, sizeof(version), "%s %d.%02d.%d%s%d",
+ RTE_VER_PREFIX,
+ RTE_VER_YEAR,
+ RTE_VER_MONTH,
+ RTE_VER_MINOR,
+ RTE_VER_SUFFIX,
+ RTE_VER_RELEASE < 16 ?
+ RTE_VER_RELEASE :
+ RTE_VER_RELEASE - 16);
+ return version;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_VERSION_H */
diff --git a/lib/librte_eal/common/include/rte_warnings.h b/lib/librte_eal/common/include/rte_warnings.h
new file mode 100644
index 00000000..54b545c9
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_warnings.h
@@ -0,0 +1,84 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Definitions of warnings for use of various insecure functions
+ */
+
+#ifndef _RTE_WARNINGS_H_
+#define _RTE_WARNINGS_H_
+
+#ifdef RTE_INSECURE_FUNCTION_WARNING
+
+/* we need to include all used standard header files so that they appear
+ * _before_ we poison the function names.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#ifdef RTE_EXEC_ENV_LINUXAPP
+#include <dirent.h>
+#endif
+
+/* the following function are deemed not fully secure for use e.g. they
+ * do not always null-terminate arguments */
+#pragma GCC poison sprintf strtok snprintf vsnprintf
+#pragma GCC poison strlen strcpy strcat
+#pragma GCC poison sscanf
+
+/* other unsafe functions may be implemented as macros so just undef them */
+#ifdef strsep
+#undef strsep
+#else
+#pragma GCC poison strsep
+#endif
+
+#ifdef strncpy
+#undef strncpy
+#else
+#pragma GCC poison strncpy
+#endif
+
+#ifdef strncat
+#undef strncat
+#else
+#pragma GCC poison strncat
+#endif
+
+#endif
+
+#endif /* RTE_WARNINGS_H */
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
new file mode 100644
index 00000000..27e9925d
--- /dev/null
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -0,0 +1,344 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+#include <rte_spinlock.h>
+
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+
+#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
+
+/*
+ * initialise a general malloc_elem header structure
+ */
+void
+malloc_elem_init(struct malloc_elem *elem,
+ struct malloc_heap *heap, const struct rte_memseg *ms, size_t size)
+{
+ elem->heap = heap;
+ elem->ms = ms;
+ elem->prev = NULL;
+ memset(&elem->free_list, 0, sizeof(elem->free_list));
+ elem->state = ELEM_FREE;
+ elem->size = size;
+ elem->pad = 0;
+ set_header(elem);
+ set_trailer(elem);
+}
+
+/*
+ * initialise a dummy malloc_elem header for the end-of-memseg marker
+ */
+void
+malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
+{
+ malloc_elem_init(elem, prev->heap, prev->ms, 0);
+ elem->prev = prev;
+ elem->state = ELEM_BUSY; /* mark busy so its never merged */
+}
+
+/*
+ * calculate the starting point of where data of the requested size
+ * and alignment would fit in the current element. If the data doesn't
+ * fit, return NULL.
+ */
+static void *
+elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
+ size_t bound)
+{
+ const size_t bmask = ~(bound - 1);
+ uintptr_t end_pt = (uintptr_t)elem +
+ elem->size - MALLOC_ELEM_TRAILER_LEN;
+ uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
+ uintptr_t new_elem_start;
+
+ /* check boundary */
+ if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
+ end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
+ new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
+ if (((end_pt - 1) & bmask) != (new_data_start & bmask))
+ return NULL;
+ }
+
+ new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
+
+ /* if the new start point is before the exist start, it won't fit */
+ return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start;
+}
+
+/*
+ * use elem_start_pt to determine if we get meet the size and
+ * alignment request from the current element
+ */
+int
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align,
+ size_t bound)
+{
+ return elem_start_pt(elem, size, align, bound) != NULL;
+}
+
+/*
+ * split an existing element into two smaller elements at the given
+ * split_pt parameter.
+ */
+static void
+split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
+{
+ struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size);
+ const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
+ const size_t new_elem_size = elem->size - old_elem_size;
+
+ malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size);
+ split_pt->prev = elem;
+ next_elem->prev = split_pt;
+ elem->size = old_elem_size;
+ set_trailer(elem);
+}
+
+/*
+ * Given an element size, compute its freelist index.
+ * We free an element into the freelist containing similarly-sized elements.
+ * We try to allocate elements starting with the freelist containing
+ * similarly-sized elements, and if necessary, we search freelists
+ * containing larger elements.
+ *
+ * Example element size ranges for a heap with five free lists:
+ * heap->free_head[0] - (0 , 2^8]
+ * heap->free_head[1] - (2^8 , 2^10]
+ * heap->free_head[2] - (2^10 ,2^12]
+ * heap->free_head[3] - (2^12, 2^14]
+ * heap->free_head[4] - (2^14, MAX_SIZE]
+ */
+size_t
+malloc_elem_free_list_index(size_t size)
+{
+#define MALLOC_MINSIZE_LOG2 8
+#define MALLOC_LOG2_INCREMENT 2
+
+ size_t log2;
+ size_t index;
+
+ if (size <= (1UL << MALLOC_MINSIZE_LOG2))
+ return 0;
+
+ /* Find next power of 2 >= size. */
+ log2 = sizeof(size) * 8 - __builtin_clzl(size-1);
+
+ /* Compute freelist index, based on log2(size). */
+ index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
+ MALLOC_LOG2_INCREMENT;
+
+ return index <= RTE_HEAP_NUM_FREELISTS-1?
+ index: RTE_HEAP_NUM_FREELISTS-1;
+}
+
+/*
+ * Add the specified element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem)
+{
+ size_t idx;
+
+ idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN);
+ elem->state = ELEM_FREE;
+ LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
+}
+
+/*
+ * Remove the specified element from its heap's free list.
+ */
+static void
+elem_free_list_remove(struct malloc_elem *elem)
+{
+ LIST_REMOVE(elem, free_list);
+}
+
+/*
+ * reserve a block of data in an existing malloc_elem. If the malloc_elem
+ * is much larger than the data block requested, we split the element in two.
+ * This function is only called from malloc_heap_alloc so parameter checking
+ * is not done here, as it's done there previously.
+ */
+struct malloc_elem *
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
+ size_t bound)
+{
+ struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound);
+ const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
+ const size_t trailer_size = elem->size - old_elem_size - size -
+ MALLOC_ELEM_OVERHEAD;
+
+ elem_free_list_remove(elem);
+
+ if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* split it, too much free space after elem */
+ struct malloc_elem *new_free_elem =
+ RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);
+
+ split_elem(elem, new_free_elem);
+ malloc_elem_free_list_insert(new_free_elem);
+ }
+
+ if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* don't split it, pad the element instead */
+ elem->state = ELEM_BUSY;
+ elem->pad = old_elem_size;
+
+ /* put a dummy header in padding, to point to real element header */
+ if (elem->pad > 0){ /* pad will be at least 64-bytes, as everything
+ * is cache-line aligned */
+ new_elem->pad = elem->pad;
+ new_elem->state = ELEM_PAD;
+ new_elem->size = elem->size - elem->pad;
+ set_header(new_elem);
+ }
+
+ return new_elem;
+ }
+
+ /* we are going to split the element in two. The original element
+ * remains free, and the new element is the one allocated.
+ * Re-insert original element, in case its new size makes it
+ * belong on a different list.
+ */
+ split_elem(elem, new_elem);
+ new_elem->state = ELEM_BUSY;
+ malloc_elem_free_list_insert(elem);
+
+ return new_elem;
+}
+
+/*
+ * joing two struct malloc_elem together. elem1 and elem2 must
+ * be contiguous in memory.
+ */
+static inline void
+join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
+{
+ struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size);
+ elem1->size += elem2->size;
+ next->prev = elem1;
+}
+
+/*
+ * free a malloc_elem block by adding it to the free list. If the
+ * blocks either immediately before or immediately after newly freed block
+ * are also free, the blocks are merged together.
+ */
+int
+malloc_elem_free(struct malloc_elem *elem)
+{
+ if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+ return -1;
+
+ rte_spinlock_lock(&(elem->heap->lock));
+ struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
+ if (next->state == ELEM_FREE){
+ /* remove from free list, join to this one */
+ elem_free_list_remove(next);
+ join_elem(elem, next);
+ }
+
+ /* check if previous element is free, if so join with it and return,
+ * need to re-insert in free list, as that element's size is changing
+ */
+ if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
+ elem_free_list_remove(elem->prev);
+ join_elem(elem->prev, elem);
+ malloc_elem_free_list_insert(elem->prev);
+ }
+ /* otherwise add ourselves to the free list */
+ else {
+ malloc_elem_free_list_insert(elem);
+ elem->pad = 0;
+ }
+ /* decrease heap's count of allocated elements */
+ elem->heap->alloc_count--;
+ rte_spinlock_unlock(&(elem->heap->lock));
+
+ return 0;
+}
+
+/*
+ * attempt to resize a malloc_elem by expanding into any free space
+ * immediately after it in memory.
+ */
+int
+malloc_elem_resize(struct malloc_elem *elem, size_t size)
+{
+ const size_t new_size = size + MALLOC_ELEM_OVERHEAD;
+ /* if we request a smaller size, then always return ok */
+ const size_t current_size = elem->size - elem->pad;
+ if (current_size >= new_size)
+ return 0;
+
+ struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
+ rte_spinlock_lock(&elem->heap->lock);
+ if (next ->state != ELEM_FREE)
+ goto err_return;
+ if (current_size + next->size < new_size)
+ goto err_return;
+
+ /* we now know the element fits, so remove from free list,
+ * join the two
+ */
+ elem_free_list_remove(next);
+ join_elem(elem, next);
+
+ if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){
+ /* now we have a big block together. Lets cut it down a bit, by splitting */
+ struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
+ split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
+ split_elem(elem, split_pt);
+ malloc_elem_free_list_insert(split_pt);
+ }
+ rte_spinlock_unlock(&elem->heap->lock);
+ return 0;
+
+err_return:
+ rte_spinlock_unlock(&elem->heap->lock);
+ return -1;
+}
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
new file mode 100644
index 00000000..f04b2d1e
--- /dev/null
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -0,0 +1,192 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MALLOC_ELEM_H_
+#define MALLOC_ELEM_H_
+
+#include <rte_memory.h>
+
+/* dummy definition of struct so we can use pointers to it in malloc_elem struct */
+struct malloc_heap;
+
+enum elem_state {
+ ELEM_FREE = 0,
+ ELEM_BUSY,
+ ELEM_PAD /* element is a padding-only header */
+};
+
+struct malloc_elem {
+ struct malloc_heap *heap;
+ struct malloc_elem *volatile prev; /* points to prev elem in memseg */
+ LIST_ENTRY(malloc_elem) free_list; /* list of free elements in heap */
+ const struct rte_memseg *ms;
+ volatile enum elem_state state;
+ uint32_t pad;
+ size_t size;
+#ifdef RTE_LIBRTE_MALLOC_DEBUG
+ uint64_t header_cookie; /* Cookie marking start of data */
+ /* trailer cookie at start + size */
+#endif
+} __rte_cache_aligned;
+
+#ifndef RTE_LIBRTE_MALLOC_DEBUG
+static const unsigned MALLOC_ELEM_TRAILER_LEN = 0;
+
+/* dummy function - just check if pointer is non-null */
+static inline int
+malloc_elem_cookies_ok(const struct malloc_elem *elem){ return elem != NULL; }
+
+/* dummy function - no header if malloc_debug is not enabled */
+static inline void
+set_header(struct malloc_elem *elem __rte_unused){ }
+
+/* dummy function - no trailer if malloc_debug is not enabled */
+static inline void
+set_trailer(struct malloc_elem *elem __rte_unused){ }
+
+
+#else
+static const unsigned MALLOC_ELEM_TRAILER_LEN = RTE_CACHE_LINE_SIZE;
+
+#define MALLOC_HEADER_COOKIE 0xbadbadbadadd2e55ULL /**< Header cookie. */
+#define MALLOC_TRAILER_COOKIE 0xadd2e55badbadbadULL /**< Trailer cookie.*/
+
+/* define macros to make referencing the header and trailer cookies easier */
+#define MALLOC_ELEM_TRAILER(elem) (*((uint64_t*)RTE_PTR_ADD(elem, \
+ elem->size - MALLOC_ELEM_TRAILER_LEN)))
+#define MALLOC_ELEM_HEADER(elem) (elem->header_cookie)
+
+static inline void
+set_header(struct malloc_elem *elem)
+{
+ if (elem != NULL)
+ MALLOC_ELEM_HEADER(elem) = MALLOC_HEADER_COOKIE;
+}
+
+static inline void
+set_trailer(struct malloc_elem *elem)
+{
+ if (elem != NULL)
+ MALLOC_ELEM_TRAILER(elem) = MALLOC_TRAILER_COOKIE;
+}
+
+/* check that the header and trailer cookies are set correctly */
+static inline int
+malloc_elem_cookies_ok(const struct malloc_elem *elem)
+{
+ return elem != NULL &&
+ MALLOC_ELEM_HEADER(elem) == MALLOC_HEADER_COOKIE &&
+ MALLOC_ELEM_TRAILER(elem) == MALLOC_TRAILER_COOKIE;
+}
+
+#endif
+
+static const unsigned MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
+#define MALLOC_ELEM_OVERHEAD (MALLOC_ELEM_HEADER_LEN + MALLOC_ELEM_TRAILER_LEN)
+
+/*
+ * Given a pointer to the start of a memory block returned by malloc, get
+ * the actual malloc_elem header for that block.
+ */
+static inline struct malloc_elem *
+malloc_elem_from_data(const void *data)
+{
+ if (data == NULL)
+ return NULL;
+
+ struct malloc_elem *elem = RTE_PTR_SUB(data, MALLOC_ELEM_HEADER_LEN);
+ if (!malloc_elem_cookies_ok(elem))
+ return NULL;
+ return elem->state != ELEM_PAD ? elem: RTE_PTR_SUB(elem, elem->pad);
+}
+
+/*
+ * initialise a malloc_elem header
+ */
+void
+malloc_elem_init(struct malloc_elem *elem,
+ struct malloc_heap *heap,
+ const struct rte_memseg *ms,
+ size_t size);
+
+/*
+ * initialise a dummy malloc_elem header for the end-of-memseg marker
+ */
+void
+malloc_elem_mkend(struct malloc_elem *elem,
+ struct malloc_elem *prev_free);
+
+/*
+ * return true if the current malloc_elem can hold a block of data
+ * of the requested size and with the requested alignment
+ */
+int
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
+ unsigned align, size_t bound);
+
+/*
+ * reserve a block of data in an existing malloc_elem. If the malloc_elem
+ * is much larger than the data block requested, we split the element in two.
+ */
+struct malloc_elem *
+malloc_elem_alloc(struct malloc_elem *elem, size_t size,
+ unsigned align, size_t bound);
+
+/*
+ * free a malloc_elem block by adding it to the free list. If the
+ * blocks either immediately before or immediately after newly freed block
+ * are also free, the blocks are merged together.
+ */
+int
+malloc_elem_free(struct malloc_elem *elem);
+
+/*
+ * attempt to resize a malloc_elem by expanding into any free space
+ * immediately after it in memory.
+ */
+int
+malloc_elem_resize(struct malloc_elem *elem, size_t size);
+
+/*
+ * Given an element size, compute its freelist index.
+ */
+size_t
+malloc_elem_free_list_index(size_t size);
+
+/*
+ * Add element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem);
+
+#endif /* MALLOC_ELEM_H_ */
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
new file mode 100644
index 00000000..763fa324
--- /dev/null
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -0,0 +1,236 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+#include <rte_memcpy.h>
+#include <rte_atomic.h>
+
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+
+static unsigned
+check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
+{
+ unsigned check_flag = 0;
+
+ if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY))
+ return 1;
+
+ switch (hugepage_sz) {
+ case RTE_PGSIZE_256K:
+ check_flag = RTE_MEMZONE_256KB;
+ break;
+ case RTE_PGSIZE_2M:
+ check_flag = RTE_MEMZONE_2MB;
+ break;
+ case RTE_PGSIZE_16M:
+ check_flag = RTE_MEMZONE_16MB;
+ break;
+ case RTE_PGSIZE_256M:
+ check_flag = RTE_MEMZONE_256MB;
+ break;
+ case RTE_PGSIZE_512M:
+ check_flag = RTE_MEMZONE_512MB;
+ break;
+ case RTE_PGSIZE_1G:
+ check_flag = RTE_MEMZONE_1GB;
+ break;
+ case RTE_PGSIZE_4G:
+ check_flag = RTE_MEMZONE_4GB;
+ break;
+ case RTE_PGSIZE_16G:
+ check_flag = RTE_MEMZONE_16GB;
+ }
+
+ return check_flag & flags;
+}
+
+/*
+ * Expand the heap with a memseg.
+ * This reserves the zone and sets a dummy malloc_elem header at the end
+ * to prevent overflow. The rest of the zone is added to free list as a single
+ * large free block
+ */
+static void
+malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
+{
+ /* allocate the memory block headers, one at end, one at start */
+ struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr;
+ struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr,
+ ms->len - MALLOC_ELEM_OVERHEAD);
+ end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);
+ const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
+
+ malloc_elem_init(start_elem, heap, ms, elem_size);
+ malloc_elem_mkend(end_elem, start_elem);
+ malloc_elem_free_list_insert(start_elem);
+
+ heap->total_size += elem_size;
+}
+
+/*
+ * Iterates through the freelist for a heap to find a free element
+ * which can store data of the required size and with the requested alignment.
+ * If size is 0, find the biggest available elem.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_suitable_element(struct malloc_heap *heap, size_t size,
+ unsigned flags, size_t align, size_t bound)
+{
+ size_t idx;
+ struct malloc_elem *elem, *alt_elem = NULL;
+
+ for (idx = malloc_elem_free_list_index(size);
+ idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+ for (elem = LIST_FIRST(&heap->free_head[idx]);
+ !!elem; elem = LIST_NEXT(elem, free_list)) {
+ if (malloc_elem_can_hold(elem, size, align, bound)) {
+ if (check_hugepage_sz(flags, elem->ms->hugepage_sz))
+ return elem;
+ if (alt_elem == NULL)
+ alt_elem = elem;
+ }
+ }
+ }
+
+ if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY))
+ return alt_elem;
+
+ return NULL;
+}
+
+/*
+ * Main function to allocate a block of memory from the heap.
+ * It locks the free list, scans it, and adds a new memseg if the
+ * scan fails. Once the new memseg is added, it re-scans and should return
+ * the new element after releasing the lock.
+ */
+void *
+malloc_heap_alloc(struct malloc_heap *heap,
+ const char *type __attribute__((unused)), size_t size, unsigned flags,
+ size_t align, size_t bound)
+{
+ struct malloc_elem *elem;
+
+ size = RTE_CACHE_LINE_ROUNDUP(size);
+ align = RTE_CACHE_LINE_ROUNDUP(align);
+
+ rte_spinlock_lock(&heap->lock);
+
+ elem = find_suitable_element(heap, size, flags, align, bound);
+ if (elem != NULL) {
+ elem = malloc_elem_alloc(elem, size, align, bound);
+ /* increase heap's count of allocated elements */
+ heap->alloc_count++;
+ }
+ rte_spinlock_unlock(&heap->lock);
+
+ return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
+/*
+ * Function to retrieve data for heap on given socket
+ */
+int
+malloc_heap_get_stats(const struct malloc_heap *heap,
+ struct rte_malloc_socket_stats *socket_stats)
+{
+ size_t idx;
+ struct malloc_elem *elem;
+
+ /* Initialise variables for heap */
+ socket_stats->free_count = 0;
+ socket_stats->heap_freesz_bytes = 0;
+ socket_stats->greatest_free_size = 0;
+
+ /* Iterate through free list */
+ for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+ for (elem = LIST_FIRST(&heap->free_head[idx]);
+ !!elem; elem = LIST_NEXT(elem, free_list))
+ {
+ socket_stats->free_count++;
+ socket_stats->heap_freesz_bytes += elem->size;
+ if (elem->size > socket_stats->greatest_free_size)
+ socket_stats->greatest_free_size = elem->size;
+ }
+ }
+ /* Get stats on overall heap and allocated memory on this heap */
+ socket_stats->heap_totalsz_bytes = heap->total_size;
+ socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes -
+ socket_stats->heap_freesz_bytes);
+ socket_stats->alloc_count = heap->alloc_count;
+ return 0;
+}
+
+int
+rte_eal_malloc_heap_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned ms_cnt;
+ struct rte_memseg *ms;
+
+ if (mcfg == NULL)
+ return -1;
+
+ for (ms = &mcfg->memseg[0], ms_cnt = 0;
+ (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0);
+ ms_cnt++, ms++) {
+#ifdef RTE_LIBRTE_IVSHMEM
+ /*
+ * if segment has ioremap address set, it's an IVSHMEM segment and
+ * it is not memory to allocate from.
+ */
+ if (ms->ioremap_addr != 0)
+ continue;
+#endif
+ malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms);
+ }
+
+ return 0;
+}
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
new file mode 100644
index 00000000..3ccbef0f
--- /dev/null
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -0,0 +1,70 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MALLOC_HEAP_H_
+#define MALLOC_HEAP_H_
+
+#include <rte_malloc.h>
+#include <rte_malloc_heap.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline unsigned
+malloc_get_numa_socket(void)
+{
+ unsigned socket_id = rte_socket_id();
+
+ if (socket_id == (unsigned)SOCKET_ID_ANY)
+ return 0;
+
+ return socket_id;
+}
+
+void *
+malloc_heap_alloc(struct malloc_heap *heap, const char *type, size_t size,
+ unsigned flags, size_t align, size_t bound);
+
+int
+malloc_heap_get_stats(const struct malloc_heap *heap,
+ struct rte_malloc_socket_stats *socket_stats);
+
+int
+rte_eal_malloc_heap_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* MALLOC_HEAP_H_ */
diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c
new file mode 100644
index 00000000..23363ec1
--- /dev/null
+++ b/lib/librte_eal/common/rte_keepalive.c
@@ -0,0 +1,149 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015-2016 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_keepalive.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+
+struct rte_keepalive {
+ /** Core Liveness. */
+ enum rte_keepalive_state {
+ ALIVE = 1,
+ MISSING = 0,
+ DEAD = 2,
+ GONE = 3
+ } __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];
+
+ /** Last-seen-alive timestamps */
+ uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
+
+ /**
+ * Cores to check.
+ * Indexed by core id, non-zero if the core should be checked.
+ */
+ uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
+
+ /** Dead core handler. */
+ rte_keepalive_failure_callback_t callback;
+
+ /**
+ * Dead core handler app data.
+ * Pointer is passed to dead core handler.
+ */
+ void *callback_data;
+ uint64_t tsc_initial;
+ uint64_t tsc_mhz;
+};
+
+static void
+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
+{
+ RTE_LOG(INFO, EAL, "%sLast seen %" PRId64 "ms ago.\n",
+ msg,
+ ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
+ / rte_get_tsc_hz()
+ );
+}
+
+void
+rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer,
+ void *ptr_data)
+{
+ struct rte_keepalive *keepcfg = ptr_data;
+ int idx_core;
+
+ for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+ if (keepcfg->active_cores[idx_core] == 0)
+ continue;
+
+ switch (keepcfg->state_flags[idx_core]) {
+ case ALIVE: /* Alive */
+ keepcfg->state_flags[idx_core] = MISSING;
+ keepcfg->last_alive[idx_core] = rte_rdtsc();
+ break;
+ case MISSING: /* MIA */
+ print_trace("Core MIA. ", keepcfg, idx_core);
+ keepcfg->state_flags[idx_core] = DEAD;
+ break;
+ case DEAD: /* Dead */
+ keepcfg->state_flags[idx_core] = GONE;
+ print_trace("Core died. ", keepcfg, idx_core);
+ if (keepcfg->callback)
+ keepcfg->callback(
+ keepcfg->callback_data,
+ idx_core
+ );
+ break;
+ case GONE: /* Buried */
+ break;
+ }
+ }
+}
+
+struct rte_keepalive *
+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
+ void *data)
+{
+ struct rte_keepalive *keepcfg;
+
+ keepcfg = rte_zmalloc("RTE_EAL_KEEPALIVE",
+ sizeof(struct rte_keepalive),
+ RTE_CACHE_LINE_SIZE);
+ if (keepcfg != NULL) {
+ keepcfg->callback = callback;
+ keepcfg->callback_data = data;
+ keepcfg->tsc_initial = rte_rdtsc();
+ keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
+ }
+ return keepcfg;
+}
+
+void
+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
+{
+ if (id_core < RTE_KEEPALIVE_MAXCORES) {
+ keepcfg->active_cores[id_core] = 1;
+ keepcfg->last_alive[id_core] = rte_rdtsc();
+ }
+}
+
+void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
+{
+ keepcfg->state_flags[rte_lcore_id()] = ALIVE;
+}
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
new file mode 100644
index 00000000..47deb007
--- /dev/null
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -0,0 +1,262 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_memcpy.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_branch_prediction.h>
+#include <rte_debug.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_spinlock.h>
+
+#include <rte_malloc.h>
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+
+
+/* Free the memory space back to heap */
+void rte_free(void *addr)
+{
+ if (addr == NULL) return;
+ if (malloc_elem_free(malloc_elem_from_data(addr)) < 0)
+ rte_panic("Fatal error: Invalid memory\n");
+}
+
+/*
+ * Allocate memory on specified heap.
+ */
+void *
+rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int socket, i;
+ void *ret;
+
+ /* return NULL if size is 0 or alignment is not power-of-2 */
+ if (size == 0 || (align && !rte_is_power_of_2(align)))
+ return NULL;
+
+ if (!rte_eal_has_hugepages())
+ socket_arg = SOCKET_ID_ANY;
+
+ if (socket_arg == SOCKET_ID_ANY)
+ socket = malloc_get_numa_socket();
+ else
+ socket = socket_arg;
+
+ /* Check socket parameter */
+ if (socket >= RTE_MAX_NUMA_NODES)
+ return NULL;
+
+ ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type,
+ size, 0, align == 0 ? 1 : align, 0);
+ if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+ return ret;
+
+ /* try other heaps */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+ /* we already tried this one */
+ if (i == socket)
+ continue;
+
+ ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type,
+ size, 0, align == 0 ? 1 : align, 0);
+ if (ret != NULL)
+ return ret;
+ }
+
+ return NULL;
+}
+
+/*
+ * Allocate memory on default heap.
+ */
+void *
+rte_malloc(const char *type, size_t size, unsigned align)
+{
+ return rte_malloc_socket(type, size, align, SOCKET_ID_ANY);
+}
+
+/*
+ * Allocate zero'd memory on specified heap.
+ */
+void *
+rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket)
+{
+ void *ptr = rte_malloc_socket(type, size, align, socket);
+
+ if (ptr != NULL)
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+/*
+ * Allocate zero'd memory on default heap.
+ */
+void *
+rte_zmalloc(const char *type, size_t size, unsigned align)
+{
+ return rte_zmalloc_socket(type, size, align, SOCKET_ID_ANY);
+}
+
+/*
+ * Allocate zero'd memory on specified heap.
+ */
+void *
+rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket)
+{
+ return rte_zmalloc_socket(type, num * size, align, socket);
+}
+
+/*
+ * Allocate zero'd memory on default heap.
+ */
+void *
+rte_calloc(const char *type, size_t num, size_t size, unsigned align)
+{
+ return rte_zmalloc(type, num * size, align);
+}
+
+/*
+ * Resize allocated memory.
+ */
+void *
+rte_realloc(void *ptr, size_t size, unsigned align)
+{
+ if (ptr == NULL)
+ return rte_malloc(NULL, size, align);
+
+ struct malloc_elem *elem = malloc_elem_from_data(ptr);
+ if (elem == NULL)
+ rte_panic("Fatal error: memory corruption detected\n");
+
+ size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align);
+ /* check alignment matches first, and if ok, see if we can resize block */
+ if (RTE_PTR_ALIGN(ptr,align) == ptr &&
+ malloc_elem_resize(elem, size) == 0)
+ return ptr;
+
+ /* either alignment is off, or we have no room to expand,
+ * so move data. */
+ void *new_ptr = rte_malloc(NULL, size, align);
+ if (new_ptr == NULL)
+ return NULL;
+ const unsigned old_size = elem->size - MALLOC_ELEM_OVERHEAD;
+ rte_memcpy(new_ptr, ptr, old_size < size ? old_size : size);
+ rte_free(ptr);
+
+ return new_ptr;
+}
+
+int
+rte_malloc_validate(const void *ptr, size_t *size)
+{
+ const struct malloc_elem *elem = malloc_elem_from_data(ptr);
+ if (!malloc_elem_cookies_ok(elem))
+ return -1;
+ if (size != NULL)
+ *size = elem->size - elem->pad - MALLOC_ELEM_OVERHEAD;
+ return 0;
+}
+
+/*
+ * Function to retrieve data for heap on given socket
+ */
+int
+rte_malloc_get_socket_stats(int socket,
+ struct rte_malloc_socket_stats *socket_stats)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ if (socket >= RTE_MAX_NUMA_NODES || socket < 0)
+ return -1;
+
+ return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats);
+}
+
+/*
+ * Print stats on memory type. If type is NULL, info on all types is printed
+ */
+void
+rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
+{
+ unsigned int socket;
+ struct rte_malloc_socket_stats sock_stats;
+ /* Iterate through all initialised heaps */
+ for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) {
+ if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0))
+ continue;
+
+ fprintf(f, "Socket:%u\n", socket);
+ fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
+ fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
+ fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes);
+ fprintf(f, "\tGreatest_free_size:%zu,\n",
+ sock_stats.greatest_free_size);
+ fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
+ fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
+ }
+ return;
+}
+
+/*
+ * TODO: Set limit to memory that can be allocated to memory type
+ */
+int
+rte_malloc_set_limit(__rte_unused const char *type,
+ __rte_unused size_t max)
+{
+ return 0;
+}
+
+/*
+ * Return the physical address of a virtual address obtained through rte_malloc
+ */
+phys_addr_t
+rte_malloc_virt2phy(const void *addr)
+{
+ const struct malloc_elem *elem = malloc_elem_from_data(addr);
+ if (elem == NULL)
+ return 0;
+ return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr);
+}
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
new file mode 100644
index 00000000..20d2a916
--- /dev/null
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -0,0 +1,39 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal
+DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio
+DIRS-$(CONFIG_RTE_KNI_KMOD) += kni
+DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
new file mode 100644
index 00000000..e1093619
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -0,0 +1,139 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+LIB = librte_eal.a
+
+ARCH_DIR ?= $(RTE_ARCH)
+EXPORT_MAP := rte_eal_version.map
+VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
+
+LIBABIVER := 2
+
+VPATH += $(RTE_SDK)/lib/librte_eal/common
+
+CFLAGS += -I$(SRCDIR)/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_ring
+CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
+CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem
+CFLAGS += $(WERROR_FLAGS) -O3
+
+LDLIBS += -ldl
+LDLIBS += -lpthread
+LDLIBS += -lgcc_s
+LDLIBS += -lrt
+
+# specific to linuxapp exec-env
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c
+ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_xen_memory.c
+endif
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio_mp_sync.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
+ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_ivshmem.c
+endif
+
+# from common dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_tailqs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_errno.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_cpuflags.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c
+
+# from arch dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c
+
+CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
+
+CFLAGS_eal.o := -D_GNU_SOURCE
+CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
+CFLAGS_eal_pci_vfio_mp_sync.o := -D_GNU_SOURCE
+CFLAGS_eal_timer.o := -D_GNU_SOURCE
+CFLAGS_eal_lcore.o := -D_GNU_SOURCE
+CFLAGS_eal_thread.o := -D_GNU_SOURCE
+CFLAGS_eal_log.o := -D_GNU_SOURCE
+CFLAGS_eal_common_log.o := -D_GNU_SOURCE
+CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
+CFLAGS_eal_pci.o := -D_GNU_SOURCE
+CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
+CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
+CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
+CFLAGS_eal_common_options.o := -D_GNU_SOURCE
+CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
+CFLAGS_eal_common_lcore.o := -D_GNU_SOURCE
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_eal_thread.o += -Wno-return-type
+endif
+
+INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
+
+SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
+ $(addprefix include/exec-env/,$(INC))
+
+DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common
+DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common/arch/$(ARCH_DIR)
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
new file mode 100644
index 00000000..8aafd519
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -0,0 +1,936 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012-2014 6WIND S.A.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <getopt.h>
+#include <sys/file.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <errno.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
+#if defined(RTE_ARCH_X86)
+#include <sys/io.h>
+#endif
+
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_random.h>
+#include <rte_cycles.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_devargs.h>
+#include <rte_common.h>
+#include <rte_version.h>
+#include <rte_atomic.h>
+#include <malloc_heap.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_hugepages.h"
+#include "eal_options.h"
+
+#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
+
+#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
+
+/* Allow the application to print its usage message too if set */
+static rte_usage_hook_t rte_application_usage_hook = NULL;
+
+/* early configuration structure, when memory config is not mmapped */
+static struct rte_mem_config early_mem_config;
+
+/* define fd variable here, because file needs to be kept open for the
+ * duration of the program, as we hold a write lock on it in the primary proc */
+static int mem_cfg_fd = -1;
+
+static struct flock wr_lock = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = offsetof(struct rte_mem_config, memseg),
+ .l_len = sizeof(early_mem_config.memseg),
+};
+
+/* Address of global and public configuration */
+static struct rte_config rte_config = {
+ .mem_config = &early_mem_config,
+};
+
+/* internal configuration (per-core) */
+struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/* internal configuration */
+struct internal_config internal_config;
+
+/* used by rte_rdtsc() */
+int rte_cycles_vmware_tsc_map;
+
+/* Return a pointer to the configuration structure */
+struct rte_config *
+rte_eal_get_configuration(void)
+{
+ return &rte_config;
+}
+
+/* parse a sysfs (or other) file containing one integer value */
+int
+eal_parse_sysfs_value(const char *filename, unsigned long *val)
+{
+ FILE *f;
+ char buf[BUFSIZ];
+ char *end = NULL;
+
+ if ((f = fopen(filename, "r")) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+ __func__, filename);
+ return -1;
+ }
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ *val = strtoul(buf, &end, 0);
+ if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+
+/* create memory configuration in shared/mmap memory. Take out
+ * a write lock on the memsegs, so we can auto-detect primary/secondary.
+ * This means we never close the file while running (auto-close on exit).
+ * We also don't lock the whole file, so that in future we can use read-locks
+ * on other parts, e.g. memzones, to detect if there are running secondary
+ * processes. */
+static void
+rte_eal_config_create(void)
+{
+ void *rte_mem_cfg_addr;
+ int retval;
+
+ const char *pathname = eal_runtime_config_path();
+
+ if (internal_config.no_shconf)
+ return;
+
+ /* map the config before hugepage address so that we don't waste a page */
+ if (internal_config.base_virtaddr != 0)
+ rte_mem_cfg_addr = (void *)
+ RTE_ALIGN_FLOOR(internal_config.base_virtaddr -
+ sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE));
+ else
+ rte_mem_cfg_addr = NULL;
+
+ if (mem_cfg_fd < 0){
+ mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
+ if (mem_cfg_fd < 0)
+ rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
+ }
+
+ retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
+ if (retval < 0){
+ close(mem_cfg_fd);
+ rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
+ }
+
+ retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
+ if (retval < 0){
+ close(mem_cfg_fd);
+ rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
+ "process running?\n", pathname);
+ }
+
+ rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config),
+ PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
+
+ if (rte_mem_cfg_addr == MAP_FAILED){
+ rte_panic("Cannot mmap memory for rte_config\n");
+ }
+ memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
+ rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+
+ /* store address of the config in the config itself so that secondary
+ * processes could later map the config into this exact location */
+ rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
+
+}
+
+/* attach to an existing shared memory config */
+static void
+rte_eal_config_attach(void)
+{
+ struct rte_mem_config *mem_config;
+
+ const char *pathname = eal_runtime_config_path();
+
+ if (internal_config.no_shconf)
+ return;
+
+ if (mem_cfg_fd < 0){
+ mem_cfg_fd = open(pathname, O_RDWR);
+ if (mem_cfg_fd < 0)
+ rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
+ }
+
+ /* map it as read-only first */
+ mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
+ PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
+ if (mem_config == MAP_FAILED)
+ rte_panic("Cannot mmap memory for rte_config\n");
+
+ rte_config.mem_config = mem_config;
+}
+
+/* reattach the shared config at exact memory location primary process has it */
+static void
+rte_eal_config_reattach(void)
+{
+ struct rte_mem_config *mem_config;
+ void *rte_mem_cfg_addr;
+
+ if (internal_config.no_shconf)
+ return;
+
+ /* save the address primary process has mapped shared config to */
+ rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr;
+
+ /* unmap original config */
+ munmap(rte_config.mem_config, sizeof(struct rte_mem_config));
+
+ /* remap the config at proper address */
+ mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
+ sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
+ mem_cfg_fd, 0);
+ close(mem_cfg_fd);
+ if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr)
+ rte_panic("Cannot mmap memory for rte_config\n");
+
+ rte_config.mem_config = mem_config;
+}
+
+/* Detect if we are a primary or a secondary process */
+enum rte_proc_type_t
+eal_proc_type_detect(void)
+{
+ enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
+ const char *pathname = eal_runtime_config_path();
+
+ /* if we can open the file but not get a write-lock we are a secondary
+ * process. NOTE: if we get a file handle back, we keep that open
+ * and don't close it to prevent a race condition between multiple opens */
+ if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+ (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+ ptype = RTE_PROC_SECONDARY;
+
+ RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
+ ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
+
+ return ptype;
+}
+
+/* Sets up rte_config structure with the pointer to shared memory config.*/
+static void
+rte_config_init(void)
+{
+ rte_config.process_type = internal_config.process_type;
+
+ switch (rte_config.process_type){
+ case RTE_PROC_PRIMARY:
+ rte_eal_config_create();
+ break;
+ case RTE_PROC_SECONDARY:
+ rte_eal_config_attach();
+ rte_eal_mcfg_wait_complete(rte_config.mem_config);
+ rte_eal_config_reattach();
+ break;
+ case RTE_PROC_AUTO:
+ case RTE_PROC_INVALID:
+ rte_panic("Invalid process type\n");
+ }
+}
+
+/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
+static void
+eal_hugedirs_unlock(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
+ {
+ /* skip uninitialized */
+ if (internal_config.hugepage_info[i].lock_descriptor < 0)
+ continue;
+ /* unlock hugepage file */
+ flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN);
+ close(internal_config.hugepage_info[i].lock_descriptor);
+ /* reset the field */
+ internal_config.hugepage_info[i].lock_descriptor = -1;
+ }
+}
+
+/* display usage */
+static void
+eal_usage(const char *prgname)
+{
+ printf("\nUsage: %s ", prgname);
+ eal_common_usage();
+ printf("EAL Linux options:\n"
+ " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n"
+ " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n"
+ " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n"
+ " --"OPT_BASE_VIRTADDR" Base virtual address\n"
+ " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n"
+ " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n"
+ " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n"
+ "\n");
+ /* Allow the application to print its usage message too if hook is set */
+ if ( rte_application_usage_hook ) {
+ printf("===== Application Usage =====\n\n");
+ rte_application_usage_hook(prgname);
+ }
+}
+
+/* Set a per-application usage message */
+rte_usage_hook_t
+rte_set_application_usage_hook( rte_usage_hook_t usage_func )
+{
+ rte_usage_hook_t old_func;
+
+ /* Will be NULL on the first call to denote the last usage routine. */
+ old_func = rte_application_usage_hook;
+ rte_application_usage_hook = usage_func;
+
+ return old_func;
+}
+
+static int
+eal_parse_socket_mem(char *socket_mem)
+{
+ char * arg[RTE_MAX_NUMA_NODES];
+ char *end;
+ int arg_num, i, len;
+ uint64_t total_mem = 0;
+
+ len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
+ if (len == SOCKET_MEM_STRLEN) {
+ RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
+ return -1;
+ }
+
+ /* all other error cases will be caught later */
+ if (!isdigit(socket_mem[len-1]))
+ return -1;
+
+ /* split the optarg into separate socket values */
+ arg_num = rte_strsplit(socket_mem, len,
+ arg, RTE_MAX_NUMA_NODES, ',');
+
+ /* if split failed, or 0 arguments */
+ if (arg_num <= 0)
+ return -1;
+
+ internal_config.force_sockets = 1;
+
+ /* parse each defined socket option */
+ errno = 0;
+ for (i = 0; i < arg_num; i++) {
+ end = NULL;
+ internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
+
+ /* check for invalid input */
+ if ((errno != 0) ||
+ (arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ internal_config.socket_mem[i] *= 1024ULL;
+ internal_config.socket_mem[i] *= 1024ULL;
+ total_mem += internal_config.socket_mem[i];
+ }
+
+ /* check if we have a positive amount of total memory */
+ if (total_mem == 0)
+ return -1;
+
+ return 0;
+}
+
+static int
+eal_parse_base_virtaddr(const char *arg)
+{
+ char *end;
+ uint64_t addr;
+
+ errno = 0;
+ addr = strtoull(arg, &end, 16);
+
+ /* check for errors */
+ if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0'))
+ return -1;
+
+ /* make sure we don't exceed 32-bit boundary on 32-bit target */
+#ifndef RTE_ARCH_64
+ if (addr >= UINTPTR_MAX)
+ return -1;
+#endif
+
+ /* align the addr on 16M boundary, 16MB is the minimum huge page
+ * size on IBM Power architecture. If the addr is aligned to 16MB,
+ * it can align to 2MB for x86. So this alignment can also be used
+ * on x86 */
+ internal_config.base_virtaddr =
+ RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M);
+
+ return 0;
+}
+
+static int
+eal_parse_vfio_intr(const char *mode)
+{
+ unsigned i;
+ static struct {
+ const char *name;
+ enum rte_intr_mode value;
+ } map[] = {
+ { "legacy", RTE_INTR_MODE_LEGACY },
+ { "msi", RTE_INTR_MODE_MSI },
+ { "msix", RTE_INTR_MODE_MSIX },
+ };
+
+ for (i = 0; i < RTE_DIM(map); i++) {
+ if (!strcmp(mode, map[i].name)) {
+ internal_config.vfio_intr_mode = map[i].value;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static inline size_t
+eal_get_hugepage_mem_size(void)
+{
+ uint64_t size = 0;
+ unsigned i, j;
+
+ for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+ struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+ if (hpi->hugedir != NULL) {
+ for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
+ size += hpi->hugepage_sz * hpi->num_pages[j];
+ }
+ }
+ }
+
+ return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
+}
+
+/* Parse the arguments for --log-level only */
+static void
+eal_log_level_parse(int argc, char **argv)
+{
+ int opt;
+ char **argvopt;
+ int option_index;
+ const int old_optind = optind;
+ const int old_optopt = optopt;
+ char * const old_optarg = optarg;
+
+ argvopt = argv;
+ optind = 1;
+
+ eal_reset_internal_config(&internal_config);
+
+ while ((opt = getopt_long(argc, argvopt, eal_short_options,
+ eal_long_options, &option_index)) != EOF) {
+
+ int ret;
+
+ /* getopt is not happy, stop right now */
+ if (opt == '?')
+ break;
+
+ ret = (opt == OPT_LOG_LEVEL_NUM) ?
+ eal_parse_common_option(opt, optarg, &internal_config) : 0;
+
+ /* common parser is not happy */
+ if (ret < 0)
+ break;
+ }
+
+ /* restore getopt lib */
+ optind = old_optind;
+ optopt = old_optopt;
+ optarg = old_optarg;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+eal_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ const int old_optind = optind;
+ const int old_optopt = optopt;
+ char * const old_optarg = optarg;
+
+ argvopt = argv;
+ optind = 1;
+
+ while ((opt = getopt_long(argc, argvopt, eal_short_options,
+ eal_long_options, &option_index)) != EOF) {
+
+ /* getopt is not happy, stop right now */
+ if (opt == '?') {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = eal_parse_common_option(opt, optarg, &internal_config);
+ /* common parser is not happy */
+ if (ret < 0) {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ /* common parser handled this option */
+ if (ret == 0)
+ continue;
+
+ switch (opt) {
+ case 'h':
+ eal_usage(prgname);
+ exit(EXIT_SUCCESS);
+
+ /* long options */
+ case OPT_XEN_DOM0_NUM:
+#ifdef RTE_LIBRTE_XEN_DOM0
+ internal_config.xen_dom0_support = 1;
+#else
+ RTE_LOG(ERR, EAL, "Can't support DPDK app "
+ "running on Dom0, please configure"
+ " RTE_LIBRTE_XEN_DOM0=y\n");
+ ret = -1;
+ goto out;
+#endif
+ break;
+
+ case OPT_HUGE_DIR_NUM:
+ internal_config.hugepage_dir = optarg;
+ break;
+
+ case OPT_FILE_PREFIX_NUM:
+ internal_config.hugefile_prefix = optarg;
+ break;
+
+ case OPT_SOCKET_MEM_NUM:
+ if (eal_parse_socket_mem(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_SOCKET_MEM "\n");
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case OPT_BASE_VIRTADDR_NUM:
+ if (eal_parse_base_virtaddr(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameter for --"
+ OPT_BASE_VIRTADDR "\n");
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case OPT_VFIO_INTR_NUM:
+ if (eal_parse_vfio_intr(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_VFIO_INTR "\n");
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case OPT_CREATE_UIO_DEV_NUM:
+ internal_config.create_uio_dev = 1;
+ break;
+
+ default:
+ if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
+ RTE_LOG(ERR, EAL, "Option %c is not supported "
+ "on Linux\n", opt);
+ } else if (opt >= OPT_LONG_MIN_NUM &&
+ opt < OPT_LONG_MAX_NUM) {
+ RTE_LOG(ERR, EAL, "Option %s is not supported "
+ "on Linux\n",
+ eal_long_options[option_index].name);
+ } else {
+ RTE_LOG(ERR, EAL, "Option %d is not supported "
+ "on Linux\n", opt);
+ }
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (eal_adjust_config(&internal_config) != 0) {
+ ret = -1;
+ goto out;
+ }
+
+ /* sanity checks */
+ if (eal_check_common_options(&internal_config) != 0) {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+
+ /* --xen-dom0 doesn't make sense with --socket-mem */
+ if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) {
+ RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified "
+ "together with --"OPT_XEN_DOM0"\n");
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+ ret = optind-1;
+
+out:
+ /* restore getopt lib */
+ optind = old_optind;
+ optopt = old_optopt;
+ optarg = old_optarg;
+
+ return ret;
+}
+
+static void
+eal_check_mem_on_local_socket(void)
+{
+ const struct rte_memseg *ms;
+ int i, socket_id;
+
+ socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
+
+ ms = rte_eal_get_physmem_layout();
+
+ for (i = 0; i < RTE_MAX_MEMSEG; i++)
+ if (ms[i].socket_id == socket_id &&
+ ms[i].len > 0)
+ return;
+
+ RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
+ "memory on local socket!\n");
+}
+
+static int
+sync_func(__attribute__((unused)) void *arg)
+{
+ return 0;
+}
+
+inline static void
+rte_eal_mcfg_complete(void)
+{
+ /* ALL shared mem_config related INIT DONE */
+ if (rte_config.process_type == RTE_PROC_PRIMARY)
+ rte_config.mem_config->magic = RTE_MAGIC;
+}
+
+/*
+ * Request iopl privilege for all RPL, returns 0 on success
+ * iopl() call is mostly for the i386 architecture. For other architectures,
+ * return -1 to indicate IO privilege can't be changed in this way.
+ */
+int
+rte_eal_iopl_init(void)
+{
+#if defined(RTE_ARCH_X86)
+ if (iopl(3) != 0)
+ return -1;
+ return 0;
+#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+ return 0; /* iopl syscall not supported for ARM/ARM64 */
+#else
+ return -1;
+#endif
+}
+
+/* Launch threads, called at application init(). */
+int
+rte_eal_init(int argc, char **argv)
+{
+ int i, fctret, ret;
+ pthread_t thread_id;
+ static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
+ const char *logid;
+ char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+ char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+ if (!rte_atomic32_test_and_set(&run_once))
+ return -1;
+
+ logid = strrchr(argv[0], '/');
+ logid = strdup(logid ? logid + 1: argv[0]);
+
+ thread_id = pthread_self();
+
+ if (rte_eal_log_early_init() < 0)
+ rte_panic("Cannot init early logs\n");
+
+ eal_log_level_parse(argc, argv);
+
+ /* set log level as early as possible */
+ rte_set_log_level(internal_config.log_level);
+
+ if (rte_eal_cpu_init() < 0)
+ rte_panic("Cannot detect lcores\n");
+
+ fctret = eal_parse_args(argc, argv);
+ if (fctret < 0)
+ exit(1);
+
+ if (internal_config.no_hugetlbfs == 0 &&
+ internal_config.process_type != RTE_PROC_SECONDARY &&
+ internal_config.xen_dom0_support == 0 &&
+ eal_hugepage_info_init() < 0)
+ rte_panic("Cannot get hugepage information\n");
+
+ if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
+ if (internal_config.no_hugetlbfs)
+ internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
+ else
+ internal_config.memory = eal_get_hugepage_mem_size();
+ }
+
+ if (internal_config.vmware_tsc_map == 1) {
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+ rte_cycles_vmware_tsc_map = 1;
+ RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
+ "you must have monitor_control.pseudo_perfctr = TRUE\n");
+#else
+ RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
+ "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
+#endif
+ }
+
+ rte_srand(rte_rdtsc());
+
+ rte_config_init();
+
+ if (rte_eal_pci_init() < 0)
+ rte_panic("Cannot init PCI\n");
+
+#ifdef RTE_LIBRTE_IVSHMEM
+ if (rte_eal_ivshmem_init() < 0)
+ rte_panic("Cannot init IVSHMEM\n");
+#endif
+
+ if (rte_eal_memory_init() < 0)
+ rte_panic("Cannot init memory\n");
+
+ /* the directories are locked during eal_hugepage_info_init */
+ eal_hugedirs_unlock();
+
+ if (rte_eal_memzone_init() < 0)
+ rte_panic("Cannot init memzone\n");
+
+ if (rte_eal_tailqs_init() < 0)
+ rte_panic("Cannot init tail queues for objects\n");
+
+#ifdef RTE_LIBRTE_IVSHMEM
+ if (rte_eal_ivshmem_obj_init() < 0)
+ rte_panic("Cannot init IVSHMEM objects\n");
+#endif
+
+ if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
+ rte_panic("Cannot init logs\n");
+
+ if (rte_eal_alarm_init() < 0)
+ rte_panic("Cannot init interrupt-handling thread\n");
+
+ if (rte_eal_timer_init() < 0)
+ rte_panic("Cannot init HPET or TSC timers\n");
+
+ eal_check_mem_on_local_socket();
+
+ if (eal_plugins_init() < 0)
+ rte_panic("Cannot init plugins\n");
+
+ eal_thread_init_master(rte_config.master_lcore);
+
+ ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+
+ RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
+ rte_config.master_lcore, (int)thread_id, cpuset,
+ ret == 0 ? "" : "...");
+
+ if (rte_eal_dev_init() < 0)
+ rte_panic("Cannot init pmd devices\n");
+
+ if (rte_eal_intr_init() < 0)
+ rte_panic("Cannot init interrupt-handling thread\n");
+
+ RTE_LCORE_FOREACH_SLAVE(i) {
+
+ /*
+ * create communication pipes between master thread
+ * and children
+ */
+ if (pipe(lcore_config[i].pipe_master2slave) < 0)
+ rte_panic("Cannot create pipe\n");
+ if (pipe(lcore_config[i].pipe_slave2master) < 0)
+ rte_panic("Cannot create pipe\n");
+
+ lcore_config[i].state = WAIT;
+
+ /* create a thread for each lcore */
+ ret = pthread_create(&lcore_config[i].thread_id, NULL,
+ eal_thread_loop, NULL);
+ if (ret != 0)
+ rte_panic("Cannot create thread\n");
+
+ /* Set thread_name for aid in debugging. */
+ snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+ "lcore-slave-%d", i);
+ ret = rte_thread_setname(lcore_config[i].thread_id,
+ thread_name);
+ if (ret != 0)
+ RTE_LOG(ERR, EAL,
+ "Cannot set name for lcore thread\n");
+ }
+
+ /*
+ * Launch a dummy function on all slave lcores, so that master lcore
+ * knows they are all ready when this function returns.
+ */
+ rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
+ rte_eal_mp_wait_lcore();
+
+ /* Probe & Initialize PCI devices */
+ if (rte_eal_pci_probe())
+ rte_panic("Cannot probe PCI\n");
+
+ rte_eal_mcfg_complete();
+
+ return fctret;
+}
+
+/* get core role */
+enum rte_lcore_role_t
+rte_eal_lcore_role(unsigned lcore_id)
+{
+ return rte_config.lcore_role[lcore_id];
+}
+
+enum rte_proc_type_t
+rte_eal_process_type(void)
+{
+ return rte_config.process_type;
+}
+
+int rte_eal_has_hugepages(void)
+{
+ return ! internal_config.no_hugetlbfs;
+}
+
+int
+rte_eal_check_module(const char *module_name)
+{
+ char sysfs_mod_name[PATH_MAX];
+ struct stat st;
+ int n;
+
+ if (NULL == module_name)
+ return -1;
+
+ /* Check if there is sysfs mounted */
+ if (stat("/sys/module", &st) != 0) {
+ RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ /* A module might be built-in, therefore try sysfs */
+ n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name);
+ if (n < 0 || n > PATH_MAX) {
+ RTE_LOG(DEBUG, EAL, "Could not format module path\n");
+ return -1;
+ }
+
+ if (stat(sysfs_mod_name, &st) != 0) {
+ RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n",
+ sysfs_mod_name, errno, strerror(errno));
+ return 0;
+ }
+
+ /* Module has been found */
+ return 1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
new file mode 100644
index 00000000..8b042abc
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -0,0 +1,273 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <sys/time.h>
+#include <sys/timerfd.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_interrupts.h>
+#include <rte_alarm.h>
+#include <rte_common.h>
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+#include <eal_private.h>
+
+#ifndef TFD_NONBLOCK
+#include <fcntl.h>
+#define TFD_NONBLOCK O_NONBLOCK
+#endif
+
+#define NS_PER_US 1000
+#define US_PER_MS 1000
+#define MS_PER_S 1000
+#define US_PER_S (US_PER_MS * MS_PER_S)
+
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
+struct alarm_entry {
+ LIST_ENTRY(alarm_entry) next;
+ struct timeval time;
+ rte_eal_alarm_callback cb_fn;
+ void *cb_arg;
+ volatile uint8_t executing;
+ volatile pthread_t executing_id;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static int handler_registered = 0;
+static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg);
+
+int
+rte_eal_alarm_init(void)
+{
+ intr_handle.type = RTE_INTR_HANDLE_ALARM;
+ /* create a timerfd file descriptor */
+ intr_handle.fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+ if (intr_handle.fd == -1)
+ goto error;
+
+ return 0;
+
+error:
+ rte_errno = errno;
+ return -1;
+}
+
+static void
+eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused,
+ void *arg __rte_unused)
+{
+ struct timespec now;
+ struct alarm_entry *ap;
+
+ rte_spinlock_lock(&alarm_list_lk);
+ while ((ap = LIST_FIRST(&alarm_list)) !=NULL &&
+ clock_gettime(CLOCK_TYPE_ID, &now) == 0 &&
+ (ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec &&
+ (ap->time.tv_usec * NS_PER_US) <= now.tv_nsec))) {
+ ap->executing = 1;
+ ap->executing_id = pthread_self();
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ ap->cb_fn(ap->cb_arg);
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ LIST_REMOVE(ap, next);
+ rte_free(ap);
+ }
+
+ if (!LIST_EMPTY(&alarm_list)) {
+ struct itimerspec atime = { .it_interval = { 0, 0 } };
+
+ ap = LIST_FIRST(&alarm_list);
+ atime.it_value.tv_sec = ap->time.tv_sec;
+ atime.it_value.tv_nsec = ap->time.tv_usec * NS_PER_US;
+ /* perform borrow for subtraction if necessary */
+ if (now.tv_nsec > (ap->time.tv_usec * NS_PER_US))
+ atime.it_value.tv_sec--, atime.it_value.tv_nsec += US_PER_S * NS_PER_US;
+
+ atime.it_value.tv_sec -= now.tv_sec;
+ atime.it_value.tv_nsec -= now.tv_nsec;
+ timerfd_settime(intr_handle.fd, 0, &atime, NULL);
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+}
+
+int
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+ struct timespec now;
+ int ret = 0;
+ struct alarm_entry *ap, *new_alarm;
+
+ /* Check parameters, including that us won't cause a uint64_t overflow */
+ if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
+ return -EINVAL;
+
+ new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0);
+ if (new_alarm == NULL)
+ return -ENOMEM;
+
+ /* use current time to calculate absolute time of alarm */
+ clock_gettime(CLOCK_TYPE_ID, &now);
+
+ new_alarm->cb_fn = cb_fn;
+ new_alarm->cb_arg = cb_arg;
+ new_alarm->time.tv_usec = ((now.tv_nsec / NS_PER_US) + us) % US_PER_S;
+ new_alarm->time.tv_sec = now.tv_sec + (((now.tv_nsec / NS_PER_US) + us) / US_PER_S);
+
+ rte_spinlock_lock(&alarm_list_lk);
+ if (!handler_registered) {
+ ret |= rte_intr_callback_register(&intr_handle,
+ eal_alarm_callback, NULL);
+ handler_registered = (ret == 0) ? 1 : 0;
+ }
+
+ if (LIST_EMPTY(&alarm_list))
+ LIST_INSERT_HEAD(&alarm_list, new_alarm, next);
+ else {
+ LIST_FOREACH(ap, &alarm_list, next) {
+ if (ap->time.tv_sec > new_alarm->time.tv_sec ||
+ (ap->time.tv_sec == new_alarm->time.tv_sec &&
+ ap->time.tv_usec > new_alarm->time.tv_usec)){
+ LIST_INSERT_BEFORE(ap, new_alarm, next);
+ break;
+ }
+ if (LIST_NEXT(ap, next) == NULL) {
+ LIST_INSERT_AFTER(ap, new_alarm, next);
+ break;
+ }
+ }
+ }
+
+ if (LIST_FIRST(&alarm_list) == new_alarm) {
+ struct itimerspec alarm_time = {
+ .it_interval = {0, 0},
+ .it_value = {
+ .tv_sec = us / US_PER_S,
+ .tv_nsec = (us % US_PER_S) * NS_PER_US,
+ },
+ };
+ ret |= timerfd_settime(intr_handle.fd, 0, &alarm_time, NULL);
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ return ret;
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+ struct alarm_entry *ap, *ap_prev;
+ int count = 0;
+ int err = 0;
+ int executing;
+
+ if (!cb_fn) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ do {
+ executing = 0;
+ rte_spinlock_lock(&alarm_list_lk);
+ /* remove any matches at the start of the list */
+ while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
+ cb_fn == ap->cb_fn &&
+ (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
+
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ rte_free(ap);
+ count++;
+ } else {
+ /* If calling from other context, mark that alarm is executing
+ * so loop can spin till it finish. Otherwise we are trying to
+ * cancel our self - mark it by EINPROGRESS */
+ if (pthread_equal(ap->executing_id, pthread_self()) == 0)
+ executing++;
+ else
+ err = EINPROGRESS;
+
+ break;
+ }
+ }
+ ap_prev = ap;
+
+ /* now go through list, removing entries not at start */
+ LIST_FOREACH(ap, &alarm_list, next) {
+ /* this won't be true first time through */
+ if (cb_fn == ap->cb_fn &&
+ (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
+
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ rte_free(ap);
+ count++;
+ ap = ap_prev;
+ } else if (pthread_equal(ap->executing_id, pthread_self()) == 0)
+ executing++;
+ else
+ err = EINPROGRESS;
+ }
+ ap_prev = ap;
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+ } while (executing != 0);
+
+ if (count == 0 && err == 0)
+ rte_errno = ENOENT;
+ else if (err)
+ rte_errno = err;
+
+ return count;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c
new file mode 100644
index 00000000..907fbfa7
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_debug.c
@@ -0,0 +1,119 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <execinfo.h>
+#include <stdarg.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+
+#define BACKTRACE_SIZE 256
+
+/* dump the stack of the calling core */
+void rte_dump_stack(void)
+{
+ void *func[BACKTRACE_SIZE];
+ char **symb = NULL;
+ int size;
+
+ size = backtrace(func, BACKTRACE_SIZE);
+ symb = backtrace_symbols(func, size);
+
+ if (symb == NULL)
+ return;
+
+ while (size > 0) {
+ rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL,
+ "%d: [%s]\n", size, symb[size - 1]);
+ size --;
+ }
+
+ free(symb);
+}
+
+/* not implemented in this environment */
+void rte_dump_registers(void)
+{
+ return;
+}
+
+/* call abort(), it will generate a coredump if enabled */
+void __rte_panic(const char *funcname, const char *format, ...)
+{
+ va_list ap;
+
+ /* disable history */
+ rte_log_set_history(0);
+
+ rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname);
+ va_start(ap, format);
+ rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+ va_end(ap);
+ rte_dump_stack();
+ rte_dump_registers();
+ abort();
+}
+
+/*
+ * Like rte_panic this terminates the application. However, no traceback is
+ * provided and no core-dump is generated.
+ */
+void
+rte_exit(int exit_code, const char *format, ...)
+{
+ va_list ap;
+
+ /* disable history */
+ rte_log_set_history(0);
+
+ if (exit_code != 0)
+ RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n"
+ " Cause: ", exit_code);
+
+ va_start(ap, format);
+ rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+ va_end(ap);
+
+#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR
+ exit(exit_code);
+#else
+ rte_dump_stack();
+ rte_dump_registers();
+ abort();
+#endif
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
new file mode 100644
index 00000000..18858e2d
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -0,0 +1,365 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#include <dirent.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+#include <rte_common.h>
+#include "rte_string_fns.h"
+#include "eal_internal_cfg.h"
+#include "eal_hugepages.h"
+#include "eal_filesystem.h"
+
+static const char sys_dir_path[] = "/sys/kernel/mm/hugepages";
+
+/* this function is only called from eal_hugepage_info_init which itself
+ * is only called from a primary process */
+static uint32_t
+get_num_hugepages(const char *subdir)
+{
+ char path[PATH_MAX];
+ long unsigned resv_pages, num_pages = 0;
+ const char *nr_hp_file = "free_hugepages";
+ const char *nr_rsvd_file = "resv_hugepages";
+
+ /* first, check how many reserved pages kernel reports */
+ snprintf(path, sizeof(path), "%s/%s/%s",
+ sys_dir_path, subdir, nr_rsvd_file);
+ if (eal_parse_sysfs_value(path, &resv_pages) < 0)
+ return 0;
+
+ snprintf(path, sizeof(path), "%s/%s/%s",
+ sys_dir_path, subdir, nr_hp_file);
+ if (eal_parse_sysfs_value(path, &num_pages) < 0)
+ return 0;
+
+ if (num_pages == 0)
+ RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n",
+ subdir);
+
+ /* adjust num_pages */
+ if (num_pages >= resv_pages)
+ num_pages -= resv_pages;
+ else if (resv_pages)
+ num_pages = 0;
+
+ /* we want to return a uint32_t and more than this looks suspicious
+ * anyway ... */
+ if (num_pages > UINT32_MAX)
+ num_pages = UINT32_MAX;
+
+ return num_pages;
+}
+
+static uint64_t
+get_default_hp_size(void)
+{
+ const char proc_meminfo[] = "/proc/meminfo";
+ const char str_hugepagesz[] = "Hugepagesize:";
+ unsigned hugepagesz_len = sizeof(str_hugepagesz) - 1;
+ char buffer[256];
+ unsigned long long size = 0;
+
+ FILE *fd = fopen(proc_meminfo, "r");
+ if (fd == NULL)
+ rte_panic("Cannot open %s\n", proc_meminfo);
+ while(fgets(buffer, sizeof(buffer), fd)){
+ if (strncmp(buffer, str_hugepagesz, hugepagesz_len) == 0){
+ size = rte_str_to_size(&buffer[hugepagesz_len]);
+ break;
+ }
+ }
+ fclose(fd);
+ if (size == 0)
+ rte_panic("Cannot get default hugepage size from %s\n", proc_meminfo);
+ return size;
+}
+
+static const char *
+get_hugepage_dir(uint64_t hugepage_sz)
+{
+ enum proc_mount_fieldnames {
+ DEVICE = 0,
+ MOUNTPT,
+ FSTYPE,
+ OPTIONS,
+ _FIELDNAME_MAX
+ };
+ static uint64_t default_size = 0;
+ const char proc_mounts[] = "/proc/mounts";
+ const char hugetlbfs_str[] = "hugetlbfs";
+ const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1;
+ const char pagesize_opt[] = "pagesize=";
+ const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1;
+ const char split_tok = ' ';
+ char *splitstr[_FIELDNAME_MAX];
+ char buf[BUFSIZ];
+ char *retval = NULL;
+
+ FILE *fd = fopen(proc_mounts, "r");
+ if (fd == NULL)
+ rte_panic("Cannot open %s\n", proc_mounts);
+
+ if (default_size == 0)
+ default_size = get_default_hp_size();
+
+ while (fgets(buf, sizeof(buf), fd)){
+ if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX,
+ split_tok) != _FIELDNAME_MAX) {
+ RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts);
+ break; /* return NULL */
+ }
+
+ /* we have a specified --huge-dir option, only examine that dir */
+ if (internal_config.hugepage_dir != NULL &&
+ strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0)
+ continue;
+
+ if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){
+ const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt);
+
+ /* if no explicit page size, the default page size is compared */
+ if (pagesz_str == NULL){
+ if (hugepage_sz == default_size){
+ retval = strdup(splitstr[MOUNTPT]);
+ break;
+ }
+ }
+ /* there is an explicit page size, so check it */
+ else {
+ uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]);
+ if (pagesz == hugepage_sz) {
+ retval = strdup(splitstr[MOUNTPT]);
+ break;
+ }
+ }
+ } /* end if strncmp hugetlbfs */
+ } /* end while fgets */
+
+ fclose(fd);
+ return retval;
+}
+
+/*
+ * Clear the hugepage directory of whatever hugepage files
+ * there are. Checks if the file is locked (i.e.
+ * if it's in use by another DPDK process).
+ */
+static int
+clear_hugedir(const char * hugedir)
+{
+ DIR *dir;
+ struct dirent *dirent;
+ int dir_fd, fd, lck_result;
+ const char filter[] = "*map_*"; /* matches hugepage files */
+
+ /* open directory */
+ dir = opendir(hugedir);
+ if (!dir) {
+ RTE_LOG(ERR, EAL, "Unable to open hugepage directory %s\n",
+ hugedir);
+ goto error;
+ }
+ dir_fd = dirfd(dir);
+
+ dirent = readdir(dir);
+ if (!dirent) {
+ RTE_LOG(ERR, EAL, "Unable to read hugepage directory %s\n",
+ hugedir);
+ goto error;
+ }
+
+ while(dirent != NULL){
+ /* skip files that don't match the hugepage pattern */
+ if (fnmatch(filter, dirent->d_name, 0) > 0) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* try and lock the file */
+ fd = openat(dir_fd, dirent->d_name, O_RDONLY);
+
+ /* skip to next file */
+ if (fd == -1) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* non-blocking lock */
+ lck_result = flock(fd, LOCK_EX | LOCK_NB);
+
+ /* if lock succeeds, unlock and remove the file */
+ if (lck_result != -1) {
+ flock(fd, LOCK_UN);
+ unlinkat(dir_fd, dirent->d_name, 0);
+ }
+ close (fd);
+ dirent = readdir(dir);
+ }
+
+ closedir(dir);
+ return 0;
+
+error:
+ if (dir)
+ closedir(dir);
+
+ RTE_LOG(ERR, EAL, "Error while clearing hugepage dir: %s\n",
+ strerror(errno));
+
+ return -1;
+}
+
+static int
+compare_hpi(const void *a, const void *b)
+{
+ const struct hugepage_info *hpi_a = a;
+ const struct hugepage_info *hpi_b = b;
+
+ return hpi_b->hugepage_sz - hpi_a->hugepage_sz;
+}
+
+/*
+ * when we initialize the hugepage info, everything goes
+ * to socket 0 by default. it will later get sorted by memory
+ * initialization procedure.
+ */
+int
+eal_hugepage_info_init(void)
+{
+ const char dirent_start_text[] = "hugepages-";
+ const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
+ unsigned i, num_sizes = 0;
+ DIR *dir;
+ struct dirent *dirent;
+
+ dir = opendir(sys_dir_path);
+ if (dir == NULL)
+ rte_panic("Cannot open directory %s to read system hugepage "
+ "info\n", sys_dir_path);
+
+ for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) {
+ struct hugepage_info *hpi;
+
+ if (strncmp(dirent->d_name, dirent_start_text,
+ dirent_start_len) != 0)
+ continue;
+
+ if (num_sizes >= MAX_HUGEPAGE_SIZES)
+ break;
+
+ hpi = &internal_config.hugepage_info[num_sizes];
+ hpi->hugepage_sz =
+ rte_str_to_size(&dirent->d_name[dirent_start_len]);
+ hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz);
+
+ /* first, check if we have a mountpoint */
+ if (hpi->hugedir == NULL) {
+ uint32_t num_pages;
+
+ num_pages = get_num_hugepages(dirent->d_name);
+ if (num_pages > 0)
+ RTE_LOG(NOTICE, EAL,
+ "%" PRIu32 " hugepages of size "
+ "%" PRIu64 " reserved, but no mounted "
+ "hugetlbfs found for that size\n",
+ num_pages, hpi->hugepage_sz);
+ continue;
+ }
+
+ /* try to obtain a writelock */
+ hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
+
+ /* if blocking lock failed */
+ if (flock(hpi->lock_descriptor, LOCK_EX) == -1) {
+ RTE_LOG(CRIT, EAL,
+ "Failed to lock hugepage directory!\n");
+ break;
+ }
+ /* clear out the hugepages dir from unused pages */
+ if (clear_hugedir(hpi->hugedir) == -1)
+ break;
+
+ /* for now, put all pages into socket 0,
+ * later they will be sorted */
+ hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
+
+#ifndef RTE_ARCH_64
+ /* for 32-bit systems, limit number of hugepages to
+ * 1GB per page size */
+ hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
+ RTE_PGSIZE_1G / hpi->hugepage_sz);
+#endif
+
+ num_sizes++;
+ }
+ closedir(dir);
+
+ /* something went wrong, and we broke from the for loop above */
+ if (dirent != NULL)
+ return -1;
+
+ internal_config.num_hugepage_sizes = num_sizes;
+
+ /* sort the page directory entries by size, largest to smallest */
+ qsort(&internal_config.hugepage_info[0], num_sizes,
+ sizeof(internal_config.hugepage_info[0]), compare_hpi);
+
+ /* now we have all info, check we have at least one valid size */
+ for (i = 0; i < num_sizes; i++)
+ if (internal_config.hugepage_info[i].hugedir != NULL &&
+ internal_config.hugepage_info[i].num_pages[0] > 0)
+ return 0;
+
+ /* no valid hugepage mounts available, return error */
+ return -1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
new file mode 100644
index 00000000..06b26a9e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -0,0 +1,1224 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <sys/epoll.h>
+#include <sys/signalfd.h>
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+#include <assert.h>
+
+#include <rte_common.h>
+#include <rte_interrupts.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+#include <rte_mempool.h>
+#include <rte_pci.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+
+#include "eal_private.h"
+#include "eal_vfio.h"
+#include "eal_thread.h"
+
+#define EAL_INTR_EPOLL_WAIT_FOREVER (-1)
+#define NB_OTHER_INTR 1
+
+static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */
+
+/**
+ * union for pipe fds.
+ */
+union intr_pipefds{
+ struct {
+ int pipefd[2];
+ };
+ struct {
+ int readfd;
+ int writefd;
+ };
+};
+
+/**
+ * union buffer for reading on different devices
+ */
+union rte_intr_read_buffer {
+ int uio_intr_count; /* for uio device */
+#ifdef VFIO_PRESENT
+ uint64_t vfio_intr_count; /* for vfio device */
+#endif
+ uint64_t timerfd_num; /* for timerfd */
+ char charbuf[16]; /* for others */
+};
+
+TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
+TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
+
+struct rte_intr_callback {
+ TAILQ_ENTRY(rte_intr_callback) next;
+ rte_intr_callback_fn cb_fn; /**< callback address */
+ void *cb_arg; /**< parameter for callback */
+};
+
+struct rte_intr_source {
+ TAILQ_ENTRY(rte_intr_source) next;
+ struct rte_intr_handle intr_handle; /**< interrupt handle */
+ struct rte_intr_cb_list callbacks; /**< user callbacks */
+ uint32_t active;
+};
+
+/* global spinlock for interrupt data operation */
+static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* union buffer for pipe read/write */
+static union intr_pipefds intr_pipe;
+
+/* interrupt sources list */
+static struct rte_intr_source_list intr_sources;
+
+/* interrupt handling thread */
+static pthread_t intr_thread;
+
+/* VFIO interrupts */
+#ifdef VFIO_PRESENT
+
+#define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int))
+/* irq set buffer length for queue interrupts and LSC interrupt */
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
+ sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1))
+
+/* enable legacy (INTx) interrupts */
+static int
+vfio_enable_intx(struct rte_intr_handle *intr_handle) {
+ struct vfio_irq_set *irq_set;
+ char irq_set_buf[IRQ_SET_BUF_LEN];
+ int len, ret;
+ int *fd_ptr;
+
+ len = sizeof(irq_set_buf);
+
+ /* enable INTx */
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+ irq_set->start = 0;
+ fd_ptr = (int *) &irq_set->data;
+ *fd_ptr = intr_handle->fd;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ /* unmask INTx after enabling */
+ memset(irq_set, 0, len);
+ len = sizeof(struct vfio_irq_set);
+ irq_set->argsz = len;
+ irq_set->count = 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
+ irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+ return 0;
+}
+
+/* disable legacy (INTx) interrupts */
+static int
+vfio_disable_intx(struct rte_intr_handle *intr_handle) {
+ struct vfio_irq_set *irq_set;
+ char irq_set_buf[IRQ_SET_BUF_LEN];
+ int len, ret;
+
+ len = sizeof(struct vfio_irq_set);
+
+ /* mask interrupts before disabling */
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
+ irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ /* disable INTx*/
+ memset(irq_set, 0, len);
+ irq_set->argsz = len;
+ irq_set->count = 0;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL,
+ "Error disabling INTx interrupts for fd %d\n", intr_handle->fd);
+ return -1;
+ }
+ return 0;
+}
+
+/* enable MSI interrupts */
+static int
+vfio_enable_msi(struct rte_intr_handle *intr_handle) {
+ int len, ret;
+ char irq_set_buf[IRQ_SET_BUF_LEN];
+ struct vfio_irq_set *irq_set;
+ int *fd_ptr;
+
+ len = sizeof(irq_set_buf);
+
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
+ irq_set->start = 0;
+ fd_ptr = (int *) &irq_set->data;
+ *fd_ptr = intr_handle->fd;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+ return 0;
+}
+
+/* disable MSI interrupts */
+static int
+vfio_disable_msi(struct rte_intr_handle *intr_handle) {
+ struct vfio_irq_set *irq_set;
+ char irq_set_buf[IRQ_SET_BUF_LEN];
+ int len, ret;
+
+ len = sizeof(struct vfio_irq_set);
+
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 0;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret)
+ RTE_LOG(ERR, EAL,
+ "Error disabling MSI interrupts for fd %d\n", intr_handle->fd);
+
+ return ret;
+}
+
+/* enable MSI-X interrupts */
+static int
+vfio_enable_msix(struct rte_intr_handle *intr_handle) {
+ int len, ret;
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+ struct vfio_irq_set *irq_set;
+ int *fd_ptr;
+
+ len = sizeof(irq_set_buf);
+
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ if (!intr_handle->max_intr)
+ intr_handle->max_intr = 1;
+ else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID)
+ intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1;
+
+ irq_set->count = intr_handle->max_intr;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = 0;
+ fd_ptr = (int *) &irq_set->data;
+ /* INTR vector offset 0 reserve for non-efds mapping */
+ fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd;
+ memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds,
+ sizeof(*intr_handle->efds) * intr_handle->nb_efd);
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* disable MSI-X interrupts */
+static int
+vfio_disable_msix(struct rte_intr_handle *intr_handle) {
+ struct vfio_irq_set *irq_set;
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+ int len, ret;
+
+ len = sizeof(struct vfio_irq_set);
+
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 0;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret)
+ RTE_LOG(ERR, EAL,
+ "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd);
+
+ return ret;
+}
+#endif
+
+static int
+uio_intx_intr_disable(struct rte_intr_handle *intr_handle)
+{
+ unsigned char command_high;
+
+ /* use UIO config file descriptor for uio_pci_generic */
+ if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+ RTE_LOG(ERR, EAL,
+ "Error reading interrupts status for fd %d\n",
+ intr_handle->uio_cfg_fd);
+ return -1;
+ }
+ /* disable interrupts */
+ command_high |= 0x4;
+ if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+ RTE_LOG(ERR, EAL,
+ "Error disabling interrupts for fd %d\n",
+ intr_handle->uio_cfg_fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+uio_intx_intr_enable(struct rte_intr_handle *intr_handle)
+{
+ unsigned char command_high;
+
+ /* use UIO config file descriptor for uio_pci_generic */
+ if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+ RTE_LOG(ERR, EAL,
+ "Error reading interrupts status for fd %d\n",
+ intr_handle->uio_cfg_fd);
+ return -1;
+ }
+ /* enable interrupts */
+ command_high &= ~0x4;
+ if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+ RTE_LOG(ERR, EAL,
+ "Error enabling interrupts for fd %d\n",
+ intr_handle->uio_cfg_fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+uio_intr_disable(struct rte_intr_handle *intr_handle)
+{
+ const int value = 0;
+
+ if (write(intr_handle->fd, &value, sizeof(value)) < 0) {
+ RTE_LOG(ERR, EAL,
+ "Error disabling interrupts for fd %d (%s)\n",
+ intr_handle->fd, strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+static int
+uio_intr_enable(struct rte_intr_handle *intr_handle)
+{
+ const int value = 1;
+
+ if (write(intr_handle->fd, &value, sizeof(value)) < 0) {
+ RTE_LOG(ERR, EAL,
+ "Error enabling interrupts for fd %d (%s)\n",
+ intr_handle->fd, strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+int
+rte_intr_callback_register(struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb, void *cb_arg)
+{
+ int ret, wake_thread;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *callback;
+
+ wake_thread = 0;
+
+ /* first do parameter checking */
+ if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Registering with invalid input parameter\n");
+ return -EINVAL;
+ }
+
+ /* allocate a new interrupt callback entity */
+ callback = rte_zmalloc("interrupt callback list",
+ sizeof(*callback), 0);
+ if (callback == NULL) {
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ return -ENOMEM;
+ }
+ callback->cb_fn = cb;
+ callback->cb_arg = cb_arg;
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if there is at least one callback registered for the fd */
+ TAILQ_FOREACH(src, &intr_sources, next) {
+ if (src->intr_handle.fd == intr_handle->fd) {
+ /* we had no interrupts for this */
+ if TAILQ_EMPTY(&src->callbacks)
+ wake_thread = 1;
+
+ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+ ret = 0;
+ break;
+ }
+ }
+
+ /* no existing callbacks for this - add new source */
+ if (src == NULL) {
+ if ((src = rte_zmalloc("interrupt source list",
+ sizeof(*src), 0)) == NULL) {
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ rte_free(callback);
+ ret = -ENOMEM;
+ } else {
+ src->intr_handle = *intr_handle;
+ TAILQ_INIT(&src->callbacks);
+ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+ TAILQ_INSERT_TAIL(&intr_sources, src, next);
+ wake_thread = 1;
+ ret = 0;
+ }
+ }
+
+ rte_spinlock_unlock(&intr_lock);
+
+ /**
+ * check if need to notify the pipe fd waited by epoll_wait to
+ * rebuild the wait list.
+ */
+ if (wake_thread)
+ if (write(intr_pipe.writefd, "1", 1) < 0)
+ return -EPIPE;
+
+ return ret;
+}
+
+int
+rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb_fn, void *cb_arg)
+{
+ int ret;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *cb, *next;
+
+ /* do parameter checking first */
+ if (intr_handle == NULL || intr_handle->fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Unregistering with invalid input parameter\n");
+ return -EINVAL;
+ }
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if the insterrupt source for the fd is existent */
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == intr_handle->fd)
+ break;
+
+ /* No interrupt source registered for the fd */
+ if (src == NULL) {
+ ret = -ENOENT;
+
+ /* interrupt source has some active callbacks right now. */
+ } else if (src->active != 0) {
+ ret = -EAGAIN;
+
+ /* ok to remove. */
+ } else {
+ ret = 0;
+
+ /*walk through the callbacks and remove all that match. */
+ for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+
+ next = TAILQ_NEXT(cb, next);
+
+ if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
+ cb->cb_arg == cb_arg)) {
+ TAILQ_REMOVE(&src->callbacks, cb, next);
+ rte_free(cb);
+ ret++;
+ }
+ }
+
+ /* all callbacks for that source are removed. */
+ if (TAILQ_EMPTY(&src->callbacks)) {
+ TAILQ_REMOVE(&intr_sources, src, next);
+ rte_free(src);
+ }
+ }
+
+ rte_spinlock_unlock(&intr_lock);
+
+ /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
+ if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) {
+ ret = -EPIPE;
+ }
+
+ return ret;
+}
+
+int
+rte_intr_enable(struct rte_intr_handle *intr_handle)
+{
+ if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+ return -1;
+
+ switch (intr_handle->type){
+ /* write to the uio fd to enable the interrupt */
+ case RTE_INTR_HANDLE_UIO:
+ if (uio_intr_enable(intr_handle))
+ return -1;
+ break;
+ case RTE_INTR_HANDLE_UIO_INTX:
+ if (uio_intx_intr_enable(intr_handle))
+ return -1;
+ break;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ return -1;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ if (vfio_enable_msix(intr_handle))
+ return -1;
+ break;
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ if (vfio_enable_msi(intr_handle))
+ return -1;
+ break;
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ if (vfio_enable_intx(intr_handle))
+ return -1;
+ break;
+#endif
+ /* unknown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+rte_intr_disable(struct rte_intr_handle *intr_handle)
+{
+ if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+ return -1;
+
+ switch (intr_handle->type){
+ /* write to the uio fd to disable the interrupt */
+ case RTE_INTR_HANDLE_UIO:
+ if (uio_intr_disable(intr_handle))
+ return -1;
+ break;
+ case RTE_INTR_HANDLE_UIO_INTX:
+ if (uio_intx_intr_disable(intr_handle))
+ return -1;
+ break;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ return -1;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ if (vfio_disable_msix(intr_handle))
+ return -1;
+ break;
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ if (vfio_disable_msi(intr_handle))
+ return -1;
+ break;
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ if (vfio_disable_intx(intr_handle))
+ return -1;
+ break;
+#endif
+ /* unknown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+eal_intr_process_interrupts(struct epoll_event *events, int nfds)
+{
+ int n, bytes_read;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *cb;
+ union rte_intr_read_buffer buf;
+ struct rte_intr_callback active_cb;
+
+ for (n = 0; n < nfds; n++) {
+
+ /**
+ * if the pipe fd is ready to read, return out to
+ * rebuild the wait list.
+ */
+ if (events[n].data.fd == intr_pipe.readfd){
+ int r = read(intr_pipe.readfd, buf.charbuf,
+ sizeof(buf.charbuf));
+ RTE_SET_USED(r);
+ return -1;
+ }
+ rte_spinlock_lock(&intr_lock);
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd ==
+ events[n].data.fd)
+ break;
+ if (src == NULL){
+ rte_spinlock_unlock(&intr_lock);
+ continue;
+ }
+
+ /* mark this interrupt source as active and release the lock. */
+ src->active = 1;
+ rte_spinlock_unlock(&intr_lock);
+
+ /* set the length to be read dor different handle type */
+ switch (src->intr_handle.type) {
+ case RTE_INTR_HANDLE_UIO:
+ case RTE_INTR_HANDLE_UIO_INTX:
+ bytes_read = sizeof(buf.uio_intr_count);
+ break;
+ case RTE_INTR_HANDLE_ALARM:
+ bytes_read = sizeof(buf.timerfd_num);
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ bytes_read = sizeof(buf.vfio_intr_count);
+ break;
+#endif
+ case RTE_INTR_HANDLE_EXT:
+ default:
+ bytes_read = 1;
+ break;
+ }
+
+ if (src->intr_handle.type != RTE_INTR_HANDLE_EXT) {
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ bytes_read = read(events[n].data.fd, &buf, bytes_read);
+ if (bytes_read < 0) {
+ if (errno == EINTR || errno == EWOULDBLOCK)
+ continue;
+
+ RTE_LOG(ERR, EAL, "Error reading from file "
+ "descriptor %d: %s\n",
+ events[n].data.fd,
+ strerror(errno));
+ } else if (bytes_read == 0)
+ RTE_LOG(ERR, EAL, "Read nothing from file "
+ "descriptor %d\n", events[n].data.fd);
+ }
+
+ /* grab a lock, again to call callbacks and update status. */
+ rte_spinlock_lock(&intr_lock);
+
+ if (bytes_read > 0) {
+
+ /* Finally, call all callbacks. */
+ TAILQ_FOREACH(cb, &src->callbacks, next) {
+
+ /* make a copy and unlock. */
+ active_cb = *cb;
+ rte_spinlock_unlock(&intr_lock);
+
+ /* call the actual callback */
+ active_cb.cb_fn(&src->intr_handle,
+ active_cb.cb_arg);
+
+ /*get the lock back. */
+ rte_spinlock_lock(&intr_lock);
+ }
+ }
+
+ /* we done with that interrupt source, release it. */
+ src->active = 0;
+ rte_spinlock_unlock(&intr_lock);
+ }
+
+ return 0;
+}
+
+/**
+ * It handles all the interrupts.
+ *
+ * @param pfd
+ * epoll file descriptor.
+ * @param totalfds
+ * The number of file descriptors added in epoll.
+ *
+ * @return
+ * void
+ */
+static void
+eal_intr_handle_interrupts(int pfd, unsigned totalfds)
+{
+ struct epoll_event events[totalfds];
+ int nfds = 0;
+
+ for(;;) {
+ nfds = epoll_wait(pfd, events, totalfds,
+ EAL_INTR_EPOLL_WAIT_FOREVER);
+ /* epoll_wait fail */
+ if (nfds < 0) {
+ if (errno == EINTR)
+ continue;
+ RTE_LOG(ERR, EAL,
+ "epoll_wait returns with fail\n");
+ return;
+ }
+ /* epoll_wait timeout, will never happens here */
+ else if (nfds == 0)
+ continue;
+ /* epoll_wait has at least one fd ready to read */
+ if (eal_intr_process_interrupts(events, nfds) < 0)
+ return;
+ }
+}
+
+/**
+ * It builds/rebuilds up the epoll file descriptor with all the
+ * file descriptors being waited on. Then handles the interrupts.
+ *
+ * @param arg
+ * pointer. (unused)
+ *
+ * @return
+ * never return;
+ */
+static __attribute__((noreturn)) void *
+eal_intr_thread_main(__rte_unused void *arg)
+{
+ struct epoll_event ev;
+
+ /* host thread, never break out */
+ for (;;) {
+ /* build up the epoll fd with all descriptors we are to
+ * wait on then pass it to the handle_interrupts function
+ */
+ static struct epoll_event pipe_event = {
+ .events = EPOLLIN | EPOLLPRI,
+ };
+ struct rte_intr_source *src;
+ unsigned numfds = 0;
+
+ /* create epoll fd */
+ int pfd = epoll_create(1);
+ if (pfd < 0)
+ rte_panic("Cannot create epoll instance\n");
+
+ pipe_event.data.fd = intr_pipe.readfd;
+ /**
+ * add pipe fd into wait list, this pipe is used to
+ * rebuild the wait list.
+ */
+ if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd,
+ &pipe_event) < 0) {
+ rte_panic("Error adding fd to %d epoll_ctl, %s\n",
+ intr_pipe.readfd, strerror(errno));
+ }
+ numfds++;
+
+ rte_spinlock_lock(&intr_lock);
+
+ TAILQ_FOREACH(src, &intr_sources, next) {
+ if (src->callbacks.tqh_first == NULL)
+ continue; /* skip those with no callbacks */
+ ev.events = EPOLLIN | EPOLLPRI;
+ ev.data.fd = src->intr_handle.fd;
+
+ /**
+ * add all the uio device file descriptor
+ * into wait list.
+ */
+ if (epoll_ctl(pfd, EPOLL_CTL_ADD,
+ src->intr_handle.fd, &ev) < 0){
+ rte_panic("Error adding fd %d epoll_ctl, %s\n",
+ src->intr_handle.fd, strerror(errno));
+ }
+ else
+ numfds++;
+ }
+ rte_spinlock_unlock(&intr_lock);
+ /* serve the interrupt */
+ eal_intr_handle_interrupts(pfd, numfds);
+
+ /**
+ * when we return, we need to rebuild the
+ * list of fds to monitor.
+ */
+ close(pfd);
+ }
+}
+
+int
+rte_eal_intr_init(void)
+{
+ int ret = 0, ret_1 = 0;
+ char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+ /* init the global interrupt source head */
+ TAILQ_INIT(&intr_sources);
+
+ /**
+ * create a pipe which will be waited by epoll and notified to
+ * rebuild the wait list of epoll.
+ */
+ if (pipe(intr_pipe.pipefd) < 0)
+ return -1;
+
+ /* create the host thread to wait/handle the interrupt */
+ ret = pthread_create(&intr_thread, NULL,
+ eal_intr_thread_main, NULL);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to create thread for interrupt handling\n");
+ } else {
+ /* Set thread_name for aid in debugging. */
+ snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+ "eal-intr-thread");
+ ret_1 = rte_thread_setname(intr_thread, thread_name);
+ if (ret_1 != 0)
+ RTE_LOG(ERR, EAL,
+ "Failed to set thread name for interrupt handling\n");
+ }
+
+ return -ret;
+}
+
+static void
+eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
+{
+ union rte_intr_read_buffer buf;
+ int bytes_read = 1;
+ int nbytes;
+
+ switch (intr_handle->type) {
+ case RTE_INTR_HANDLE_UIO:
+ case RTE_INTR_HANDLE_UIO_INTX:
+ bytes_read = sizeof(buf.uio_intr_count);
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ bytes_read = sizeof(buf.vfio_intr_count);
+ break;
+#endif
+ default:
+ bytes_read = 1;
+ RTE_LOG(INFO, EAL, "unexpected intr type\n");
+ break;
+ }
+
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ do {
+ nbytes = read(fd, &buf, bytes_read);
+ if (nbytes < 0) {
+ if (errno == EINTR || errno == EWOULDBLOCK ||
+ errno == EAGAIN)
+ continue;
+ RTE_LOG(ERR, EAL,
+ "Error reading from fd %d: %s\n",
+ fd, strerror(errno));
+ } else if (nbytes == 0)
+ RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd);
+ return;
+ } while (1);
+}
+
+static int
+eal_epoll_process_event(struct epoll_event *evs, unsigned int n,
+ struct rte_epoll_event *events)
+{
+ unsigned int i, count = 0;
+ struct rte_epoll_event *rev;
+
+ for (i = 0; i < n; i++) {
+ rev = evs[i].data.ptr;
+ if (!rev || !rte_atomic32_cmpset(&rev->status, RTE_EPOLL_VALID,
+ RTE_EPOLL_EXEC))
+ continue;
+
+ events[count].status = RTE_EPOLL_VALID;
+ events[count].fd = rev->fd;
+ events[count].epfd = rev->epfd;
+ events[count].epdata.event = rev->epdata.event;
+ events[count].epdata.data = rev->epdata.data;
+ if (rev->epdata.cb_fun)
+ rev->epdata.cb_fun(rev->fd,
+ rev->epdata.cb_arg);
+
+ rte_compiler_barrier();
+ rev->status = RTE_EPOLL_VALID;
+ count++;
+ }
+ return count;
+}
+
+static inline int
+eal_init_tls_epfd(void)
+{
+ int pfd = epoll_create(255);
+
+ if (pfd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Cannot create epoll instance\n");
+ return -1;
+ }
+ return pfd;
+}
+
+int
+rte_intr_tls_epfd(void)
+{
+ if (RTE_PER_LCORE(_epfd) == -1)
+ RTE_PER_LCORE(_epfd) = eal_init_tls_epfd();
+
+ return RTE_PER_LCORE(_epfd);
+}
+
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+ int maxevents, int timeout)
+{
+ struct epoll_event evs[maxevents];
+ int rc;
+
+ if (!events) {
+ RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
+ return -1;
+ }
+
+ /* using per thread epoll fd */
+ if (epfd == RTE_EPOLL_PER_THREAD)
+ epfd = rte_intr_tls_epfd();
+
+ while (1) {
+ rc = epoll_wait(epfd, evs, maxevents, timeout);
+ if (likely(rc > 0)) {
+ /* epoll_wait has at least one fd ready to read */
+ rc = eal_epoll_process_event(evs, rc, events);
+ break;
+ } else if (rc < 0) {
+ if (errno == EINTR)
+ continue;
+ /* epoll_wait fail */
+ RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n",
+ strerror(errno));
+ rc = -1;
+ break;
+ } else {
+ /* rc == 0, epoll_wait timed out */
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static inline void
+eal_epoll_data_safe_free(struct rte_epoll_event *ev)
+{
+ while (!rte_atomic32_cmpset(&ev->status, RTE_EPOLL_VALID,
+ RTE_EPOLL_INVALID))
+ while (ev->status != RTE_EPOLL_VALID)
+ rte_pause();
+ memset(&ev->epdata, 0, sizeof(ev->epdata));
+ ev->fd = -1;
+ ev->epfd = -1;
+}
+
+int
+rte_epoll_ctl(int epfd, int op, int fd,
+ struct rte_epoll_event *event)
+{
+ struct epoll_event ev;
+
+ if (!event) {
+ RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
+ return -1;
+ }
+
+ /* using per thread epoll fd */
+ if (epfd == RTE_EPOLL_PER_THREAD)
+ epfd = rte_intr_tls_epfd();
+
+ if (op == EPOLL_CTL_ADD) {
+ event->status = RTE_EPOLL_VALID;
+ event->fd = fd; /* ignore fd in event */
+ event->epfd = epfd;
+ ev.data.ptr = (void *)event;
+ }
+
+ ev.events = event->epdata.event;
+ if (epoll_ctl(epfd, op, fd, &ev) < 0) {
+ RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n",
+ op, fd, strerror(errno));
+ if (op == EPOLL_CTL_ADD)
+ /* rollback status when CTL_ADD fail */
+ event->status = RTE_EPOLL_INVALID;
+ return -1;
+ }
+
+ if (op == EPOLL_CTL_DEL && event->status != RTE_EPOLL_INVALID)
+ eal_epoll_data_safe_free(event);
+
+ return 0;
+}
+
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd,
+ int op, unsigned int vec, void *data)
+{
+ struct rte_epoll_event *rev;
+ struct rte_epoll_data *epdata;
+ int epfd_op;
+ unsigned int efd_idx;
+ int rc = 0;
+
+ efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ?
+ (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec;
+
+ if (!intr_handle || intr_handle->nb_efd == 0 ||
+ efd_idx >= intr_handle->nb_efd) {
+ RTE_LOG(ERR, EAL, "Wrong intr vector number.\n");
+ return -EPERM;
+ }
+
+ switch (op) {
+ case RTE_INTR_EVENT_ADD:
+ epfd_op = EPOLL_CTL_ADD;
+ rev = &intr_handle->elist[efd_idx];
+ if (rev->status != RTE_EPOLL_INVALID) {
+ RTE_LOG(INFO, EAL, "Event already been added.\n");
+ return -EEXIST;
+ }
+
+ /* attach to intr vector fd */
+ epdata = &rev->epdata;
+ epdata->event = EPOLLIN | EPOLLPRI | EPOLLET;
+ epdata->data = data;
+ epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr;
+ epdata->cb_arg = (void *)intr_handle;
+ rc = rte_epoll_ctl(epfd, epfd_op,
+ intr_handle->efds[efd_idx], rev);
+ if (!rc)
+ RTE_LOG(DEBUG, EAL,
+ "efd %d associated with vec %d added on epfd %d"
+ "\n", rev->fd, vec, epfd);
+ else
+ rc = -EPERM;
+ break;
+ case RTE_INTR_EVENT_DEL:
+ epfd_op = EPOLL_CTL_DEL;
+ rev = &intr_handle->elist[efd_idx];
+ if (rev->status == RTE_EPOLL_INVALID) {
+ RTE_LOG(INFO, EAL, "Event does not exist.\n");
+ return -EPERM;
+ }
+
+ rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev);
+ if (rc)
+ rc = -EPERM;
+ break;
+ default:
+ RTE_LOG(ERR, EAL, "event op type mismatch\n");
+ rc = -EPERM;
+ }
+
+ return rc;
+}
+
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
+{
+ uint32_t i;
+ int fd;
+ uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
+
+ assert(nb_efd != 0);
+
+ if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) {
+ for (i = 0; i < n; i++) {
+ fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "can't setup eventfd, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ intr_handle->efds[i] = fd;
+ }
+ intr_handle->nb_efd = n;
+ intr_handle->max_intr = NB_OTHER_INTR + n;
+ } else {
+ intr_handle->efds[0] = intr_handle->fd;
+ intr_handle->nb_efd = RTE_MIN(nb_efd, 1U);
+ intr_handle->max_intr = NB_OTHER_INTR;
+ }
+
+ return 0;
+}
+
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
+{
+ uint32_t i;
+ struct rte_epoll_event *rev;
+
+ for (i = 0; i < intr_handle->nb_efd; i++) {
+ rev = &intr_handle->elist[i];
+ if (rev->status == RTE_EPOLL_INVALID)
+ continue;
+ if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) {
+ /* force free if the entry valid */
+ eal_epoll_data_safe_free(rev);
+ rev->status = RTE_EPOLL_INVALID;
+ }
+ }
+
+ if (intr_handle->max_intr > intr_handle->nb_efd) {
+ for (i = 0; i < intr_handle->nb_efd; i++)
+ close(intr_handle->efds[i]);
+ }
+ intr_handle->nb_efd = 0;
+ intr_handle->max_intr = 0;
+}
+
+int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
+{
+ return !(!intr_handle->nb_efd);
+}
+
+int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle)
+{
+ if (!rte_intr_dp_is_en(intr_handle))
+ return 1;
+ else
+ return !!(intr_handle->max_intr - intr_handle->nb_efd);
+}
+
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
+{
+ if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX)
+ return 1;
+
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
new file mode 100644
index 00000000..07aec694
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
@@ -0,0 +1,955 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */
+
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/file.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_common.h>
+#include <rte_ivshmem.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_private.h"
+
+#define PCI_VENDOR_ID_IVSHMEM 0x1Af4
+#define PCI_DEVICE_ID_IVSHMEM 0x1110
+
+#define IVSHMEM_MAGIC 0x0BADC0DE
+
+#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2"
+#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config"
+
+#define PHYS 0x1
+#define VIRT 0x2
+#define IOREMAP 0x4
+#define FULL (PHYS|VIRT|IOREMAP)
+
+#define METADATA_SIZE_ALIGNED \
+ (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
+
+#define CONTAINS(x,y)\
+ (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len))
+
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+
+struct ivshmem_pci_device {
+ char path[PATH_MAX];
+ phys_addr_t ioremap_addr;
+};
+
+/* data type to store in config */
+struct ivshmem_segment {
+ struct rte_ivshmem_metadata_entry entry;
+ uint64_t align;
+ char path[PATH_MAX];
+};
+struct ivshmem_shared_config {
+ struct ivshmem_segment segment[RTE_MAX_MEMSEG];
+ uint32_t segment_idx;
+ struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS];
+ uint32_t pci_devs_idx;
+};
+static struct ivshmem_shared_config * ivshmem_config;
+static int memseg_idx;
+static int pagesz;
+
+/* Tailq heads to add rings to */
+TAILQ_HEAD(rte_ring_list, rte_tailq_entry);
+
+/*
+ * Utility functions
+ */
+
+static int
+is_ivshmem_device(struct rte_pci_device * dev)
+{
+ return dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM
+ && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM;
+}
+
+static void *
+map_metadata(int fd, uint64_t len)
+{
+ size_t metadata_len = sizeof(struct rte_ivshmem_metadata);
+ size_t aligned_len = METADATA_SIZE_ALIGNED;
+
+ return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, len - aligned_len);
+}
+
+static void
+unmap_metadata(void * ptr)
+{
+ munmap(ptr, sizeof(struct rte_ivshmem_metadata));
+}
+
+static int
+has_ivshmem_metadata(int fd, uint64_t len)
+{
+ struct rte_ivshmem_metadata metadata;
+ void * ptr;
+
+ ptr = map_metadata(fd, len);
+
+ if (ptr == MAP_FAILED)
+ return -1;
+
+ metadata = *(struct rte_ivshmem_metadata*) (ptr);
+
+ unmap_metadata(ptr);
+
+ return metadata.magic_number == IVSHMEM_MAGIC;
+}
+
+static void
+remove_segment(struct ivshmem_segment * ms, int len, int idx)
+{
+ int i;
+
+ for (i = idx; i < len - 1; i++)
+ memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment));
+ memset(&ms[len-1], 0, sizeof(struct ivshmem_segment));
+}
+
+static int
+overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
+{
+ uint64_t start1, end1, start2, end2;
+ uint64_t p_start1, p_end1, p_start2, p_end2;
+ uint64_t i_start1, i_end1, i_start2, i_end2;
+ int result = 0;
+
+ /* gather virtual addresses */
+ start1 = mz1->addr_64;
+ end1 = mz1->addr_64 + mz1->len;
+ start2 = mz2->addr_64;
+ end2 = mz2->addr_64 + mz2->len;
+
+ /* gather physical addresses */
+ p_start1 = mz1->phys_addr;
+ p_end1 = mz1->phys_addr + mz1->len;
+ p_start2 = mz2->phys_addr;
+ p_end2 = mz2->phys_addr + mz2->len;
+
+ /* gather ioremap addresses */
+ i_start1 = mz1->ioremap_addr;
+ i_end1 = mz1->ioremap_addr + mz1->len;
+ i_start2 = mz2->ioremap_addr;
+ i_end2 = mz2->ioremap_addr + mz2->len;
+
+ /* check for overlap in virtual addresses */
+ if (start1 > start2 && start1 < end2)
+ result |= VIRT;
+ if (start2 >= start1 && start2 < end1)
+ result |= VIRT;
+
+ /* check for overlap in physical addresses */
+ if (p_start1 > p_start2 && p_start1 < p_end2)
+ result |= PHYS;
+ if (p_start2 > p_start1 && p_start2 < p_end1)
+ result |= PHYS;
+
+ /* check for overlap in ioremap addresses */
+ if (i_start1 > i_start2 && i_start1 < i_end2)
+ result |= IOREMAP;
+ if (i_start2 > i_start1 && i_start2 < i_end1)
+ result |= IOREMAP;
+
+ return result;
+}
+
+static int
+adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
+{
+ uint64_t start1, end1, start2, end2;
+ uint64_t p_start1, p_end1, p_start2, p_end2;
+ uint64_t i_start1, i_end1, i_start2, i_end2;
+ int result = 0;
+
+ /* gather virtual addresses */
+ start1 = mz1->addr_64;
+ end1 = mz1->addr_64 + mz1->len;
+ start2 = mz2->addr_64;
+ end2 = mz2->addr_64 + mz2->len;
+
+ /* gather physical addresses */
+ p_start1 = mz1->phys_addr;
+ p_end1 = mz1->phys_addr + mz1->len;
+ p_start2 = mz2->phys_addr;
+ p_end2 = mz2->phys_addr + mz2->len;
+
+ /* gather ioremap addresses */
+ i_start1 = mz1->ioremap_addr;
+ i_end1 = mz1->ioremap_addr + mz1->len;
+ i_start2 = mz2->ioremap_addr;
+ i_end2 = mz2->ioremap_addr + mz2->len;
+
+ /* check if segments are virtually adjacent */
+ if (start1 == end2)
+ result |= VIRT;
+ if (start2 == end1)
+ result |= VIRT;
+
+ /* check if segments are physically adjacent */
+ if (p_start1 == p_end2)
+ result |= PHYS;
+ if (p_start2 == p_end1)
+ result |= PHYS;
+
+ /* check if segments are ioremap-adjacent */
+ if (i_start1 == i_end2)
+ result |= IOREMAP;
+ if (i_start2 == i_end1)
+ result |= IOREMAP;
+
+ return result;
+}
+
+static int
+has_adjacent_segments(struct ivshmem_segment * ms, int len)
+{
+ int i, j;
+
+ for (i = 0; i < len; i++)
+ for (j = i + 1; j < len; j++) {
+ /* we're only interested in fully adjacent segments; partially
+ * adjacent segments can coexist.
+ */
+ if (adjacent(&ms[i].entry.mz, &ms[j].entry.mz) == FULL)
+ return 1;
+ }
+ return 0;
+}
+
+static int
+has_overlapping_segments(struct ivshmem_segment * ms, int len)
+{
+ int i, j;
+
+ for (i = 0; i < len; i++)
+ for (j = i + 1; j < len; j++)
+ if (overlap(&ms[i].entry.mz, &ms[j].entry.mz))
+ return 1;
+ return 0;
+}
+
+static int
+seg_compare(const void * a, const void * b)
+{
+ const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a;
+ const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b;
+
+ /* move unallocated zones to the end */
+ if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL)
+ return 0;
+ if (s1->entry.mz.addr == 0)
+ return 1;
+ if (s2->entry.mz.addr == 0)
+ return -1;
+
+ return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr;
+}
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+static void
+entry_dump(struct rte_ivshmem_metadata_entry *e)
+{
+ RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr,
+ RTE_PTR_ADD(e->mz.addr, e->mz.len));
+ RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n",
+ e->mz.phys_addr,
+ e->mz.phys_addr + e->mz.len);
+ RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n",
+ e->mz.ioremap_addr,
+ e->mz.ioremap_addr + e->mz.len);
+ RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len);
+ RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset);
+}
+#endif
+
+
+
+/*
+ * Actual useful code
+ */
+
+/* read through metadata mapped from the IVSHMEM device */
+static int
+read_metadata(char * path, int path_len, int fd, uint64_t flen)
+{
+ struct rte_ivshmem_metadata metadata;
+ struct rte_ivshmem_metadata_entry * entry;
+ int idx, i;
+ void * ptr;
+
+ ptr = map_metadata(fd, flen);
+
+ if (ptr == MAP_FAILED)
+ return -1;
+
+ metadata = *(struct rte_ivshmem_metadata*) (ptr);
+
+ unmap_metadata(ptr);
+
+ RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name);
+
+ idx = ivshmem_config->segment_idx;
+
+ for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES &&
+ idx <= RTE_MAX_MEMSEG; i++) {
+
+ if (idx == RTE_MAX_MEMSEG) {
+ RTE_LOG(ERR, EAL, "Not enough memory segments!\n");
+ return -1;
+ }
+
+ entry = &metadata.entry[i];
+
+ /* stop on uninitialized memzone */
+ if (entry->mz.len == 0)
+ break;
+
+ /* copy metadata entry */
+ memcpy(&ivshmem_config->segment[idx].entry, entry,
+ sizeof(struct rte_ivshmem_metadata_entry));
+
+ /* copy path */
+ snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path);
+
+ idx++;
+ }
+ ivshmem_config->segment_idx = idx;
+
+ return 0;
+}
+
+/* check through each segment and look for adjacent or overlapping ones. */
+static int
+cleanup_segments(struct ivshmem_segment * ms, int tbl_len)
+{
+ struct ivshmem_segment * s, * tmp;
+ int i, j, concat, seg_adjacent, seg_overlapping;
+ uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2;
+
+ qsort(ms, tbl_len, sizeof(struct ivshmem_segment),
+ seg_compare);
+
+ while (has_overlapping_segments(ms, tbl_len) ||
+ has_adjacent_segments(ms, tbl_len)) {
+
+ for (i = 0; i < tbl_len; i++) {
+ s = &ms[i];
+
+ concat = 0;
+
+ for (j = i + 1; j < tbl_len; j++) {
+ tmp = &ms[j];
+
+ /* check if this segment is overlapping with existing segment,
+ * or is adjacent to existing segment */
+ seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz);
+ seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz);
+
+ /* check if segments fully overlap or are fully adjacent */
+ if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) {
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+ RTE_LOG(DEBUG, EAL, "Concatenating segments\n");
+ RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
+ entry_dump(&s->entry);
+ RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
+ entry_dump(&tmp->entry);
+#endif
+
+ start1 = s->entry.mz.addr_64;
+ start2 = tmp->entry.mz.addr_64;
+ p_start1 = s->entry.mz.phys_addr;
+ p_start2 = tmp->entry.mz.phys_addr;
+ i_start1 = s->entry.mz.ioremap_addr;
+ i_start2 = tmp->entry.mz.ioremap_addr;
+ end1 = s->entry.mz.addr_64 + s->entry.mz.len;
+ end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len;
+
+ /* settle for minimum start address and maximum length */
+ s->entry.mz.addr_64 = RTE_MIN(start1, start2);
+ s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2);
+ s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2);
+ s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset);
+ s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64;
+ concat = 1;
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+ RTE_LOG(DEBUG, EAL, "Resulting segment:\n");
+ entry_dump(&s->entry);
+
+#endif
+ }
+ /* if segments not fully overlap, we have an error condition.
+ * adjacent segments can coexist.
+ */
+ else if (seg_overlapping > 0) {
+ RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j);
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+ RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
+ entry_dump(&s->entry);
+ RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
+ entry_dump(&tmp->entry);
+#endif
+ return -1;
+ }
+ if (concat)
+ break;
+ }
+ /* if we concatenated, remove segment at j */
+ if (concat) {
+ remove_segment(ms, tbl_len, j);
+ tbl_len--;
+ break;
+ }
+ }
+ }
+
+ return tbl_len;
+}
+
+static int
+create_shared_config(void)
+{
+ char path[PATH_MAX];
+ int fd;
+
+ /* build ivshmem config file path */
+ snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
+ internal_config.hugefile_prefix);
+
+ fd = open(path, O_CREAT | O_RDWR, 0600);
+
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno));
+ return -1;
+ }
+
+ /* try ex-locking first - if the file is locked, we have a problem */
+ if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
+ RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) {
+ RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno));
+ return -1;
+ }
+
+ ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+ if (ivshmem_config == MAP_FAILED)
+ return -1;
+
+ memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config));
+
+ /* change the exclusive lock we got earlier to a shared lock */
+ if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+ RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
+ return -1;
+ }
+
+ close(fd);
+
+ return 0;
+}
+
+/* open shared config file and, if present, map the config.
+ * having no config file is not an error condition, as we later check if
+ * ivshmem_config is NULL (if it is, that means nothing was mapped). */
+static int
+open_shared_config(void)
+{
+ char path[PATH_MAX];
+ int fd;
+
+ /* build ivshmem config file path */
+ snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
+ internal_config.hugefile_prefix);
+
+ fd = open(path, O_RDONLY);
+
+ /* if the file doesn't exist, just return success */
+ if (fd < 0 && errno == ENOENT)
+ return 0;
+ /* else we have an error condition */
+ else if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
+ path, strerror(errno));
+ return -1;
+ }
+
+ /* try ex-locking first - if the lock *does* succeed, this means it's a
+ * stray config file, so it should be deleted.
+ */
+ if (flock(fd, LOCK_EX | LOCK_NB) != -1) {
+
+ /* if we can't remove the file, something is wrong */
+ if (unlink(path) < 0) {
+ RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path,
+ strerror(errno));
+ return -1;
+ }
+
+ /* release the lock */
+ flock(fd, LOCK_UN);
+ close(fd);
+
+ /* return success as having a stray config file is equivalent to not
+ * having config file at all.
+ */
+ return 0;
+ }
+
+ ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
+ PROT_READ, MAP_SHARED, fd, 0);
+
+ if (ivshmem_config == MAP_FAILED)
+ return -1;
+
+ /* place a shared lock on config file */
+ if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+ RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
+ return -1;
+ }
+
+ close(fd);
+
+ return 0;
+}
+
+/*
+ * This function does the following:
+ *
+ * 1) Builds a table of ivshmem_segments with proper offset alignment
+ * 2) Cleans up that table so that we don't have any overlapping or adjacent
+ * memory segments
+ * 3) Creates memsegs from this table and maps them into memory.
+ */
+static inline int
+map_all_segments(void)
+{
+ struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG];
+ struct ivshmem_pci_device * pci_dev;
+ struct rte_mem_config * mcfg;
+ struct ivshmem_segment * seg;
+ int fd, fd_zero;
+ unsigned i, j;
+ struct rte_memzone mz;
+ struct rte_memseg ms;
+ void * base_addr;
+ uint64_t align, len;
+ phys_addr_t ioremap_addr;
+
+ ioremap_addr = 0;
+
+ memset(ms_tbl, 0, sizeof(ms_tbl));
+ memset(&mz, 0, sizeof(struct rte_memzone));
+ memset(&ms, 0, sizeof(struct rte_memseg));
+
+ /* first, build a table of memsegs to map, to avoid failed mmaps due to
+ * overlaps
+ */
+ for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) {
+ if (i == RTE_MAX_MEMSEG) {
+ RTE_LOG(ERR, EAL, "Too many segments requested!\n");
+ return -1;
+ }
+
+ seg = &ivshmem_config->segment[i];
+
+ /* copy segment to table */
+ memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment));
+
+ /* find ioremap addr */
+ for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) {
+ pci_dev = &ivshmem_config->pci_devs[j];
+ if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) {
+ ioremap_addr = pci_dev->ioremap_addr;
+ break;
+ }
+ }
+ if (ioremap_addr == 0) {
+ RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n");
+ return -1;
+ }
+
+ /* work out alignments */
+ align = seg->entry.mz.addr_64 -
+ RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000);
+ len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000);
+
+ /* save original alignments */
+ ms_tbl[i].align = align;
+
+ /* create a memory zone */
+ mz.addr_64 = seg->entry.mz.addr_64 - align;
+ mz.len = len;
+ mz.hugepage_sz = seg->entry.mz.hugepage_sz;
+ mz.phys_addr = seg->entry.mz.phys_addr - align;
+
+ /* find true physical address */
+ mz.ioremap_addr = ioremap_addr + seg->entry.offset - align;
+
+ ms_tbl[i].entry.offset = seg->entry.offset - align;
+
+ memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone));
+ }
+
+ /* clean up the segments */
+ memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx);
+
+ if (memseg_idx < 0)
+ return -1;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ fd_zero = open("/dev/zero", O_RDWR);
+
+ if (fd_zero < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno));
+ return -1;
+ }
+
+ /* create memsegs and put them into DPDK memory */
+ for (i = 0; i < (unsigned) memseg_idx; i++) {
+
+ seg = &ms_tbl[i];
+
+ ms.addr_64 = seg->entry.mz.addr_64;
+ ms.hugepage_sz = seg->entry.mz.hugepage_sz;
+ ms.len = seg->entry.mz.len;
+ ms.nchannel = rte_memory_get_nchannel();
+ ms.nrank = rte_memory_get_nrank();
+ ms.phys_addr = seg->entry.mz.phys_addr;
+ ms.ioremap_addr = seg->entry.mz.ioremap_addr;
+ ms.socket_id = seg->entry.mz.socket_id;
+
+ base_addr = mmap(ms.addr, ms.len,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0);
+
+ if (base_addr == MAP_FAILED || base_addr != ms.addr) {
+ RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n");
+ return -1;
+ }
+
+ fd = open(seg->path, O_RDWR);
+
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path,
+ strerror(errno));
+ return -1;
+ }
+
+ munmap(ms.addr, ms.len);
+
+ base_addr = mmap(ms.addr, ms.len,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+ seg->entry.offset);
+
+
+ if (base_addr == MAP_FAILED || base_addr != ms.addr) {
+ RTE_LOG(ERR, EAL, "Cannot map segment into memory: "
+ "expected %p got %p (%s)\n", ms.addr, base_addr,
+ strerror(errno));
+ return -1;
+ }
+
+ RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at "
+ "offset 0x%" PRIx64 "\n",
+ ms.addr, ms.len, seg->entry.offset);
+
+ /* put the pointers back into their real positions using original
+ * alignment */
+ ms.addr_64 += seg->align;
+ ms.phys_addr += seg->align;
+ ms.ioremap_addr += seg->align;
+ ms.len -= seg->align;
+
+ /* at this point, the rest of DPDK memory is not initialized, so we
+ * expect memsegs to be empty */
+ memcpy(&mcfg->memseg[i], &ms,
+ sizeof(struct rte_memseg));
+
+ close(fd);
+
+ RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n",
+ ms.len);
+ }
+
+ return 0;
+}
+
+/* this happens at a later stage, after general EAL memory initialization */
+int
+rte_eal_ivshmem_obj_init(void)
+{
+ struct rte_ring_list* ring_list = NULL;
+ struct rte_mem_config * mcfg;
+ struct ivshmem_segment * seg;
+ struct rte_memzone * mz;
+ struct rte_ring * r;
+ struct rte_tailq_entry *te;
+ unsigned i, ms, idx;
+ uint64_t offset;
+
+ /* secondary process would not need any object discovery - it'll all
+ * already be in shared config */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL)
+ return 0;
+
+ /* check that we have an initialised ring tail queue */
+ ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list);
+ if (ring_list == NULL) {
+ RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n");
+ return -1;
+ }
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* create memzones */
+ for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) {
+
+ seg = &ivshmem_config->segment[i];
+
+ /* add memzone */
+ if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) {
+ RTE_LOG(ERR, EAL, "No more memory zones available!\n");
+ return -1;
+ }
+
+ idx = mcfg->memzone_cnt;
+
+ RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n",
+ seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len);
+
+ memcpy(&mcfg->memzone[idx], &seg->entry.mz,
+ sizeof(struct rte_memzone));
+
+ /* find ioremap address */
+ for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) {
+ if (ms == RTE_MAX_MEMSEG) {
+ RTE_LOG(ERR, EAL, "Physical address of segment not found!\n");
+ return -1;
+ }
+ if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) {
+ offset = mcfg->memzone[idx].addr_64 -
+ mcfg->memseg[ms].addr_64;
+ mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr +
+ offset;
+ break;
+ }
+ }
+
+ mcfg->memzone_cnt++;
+ }
+
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+ /* find rings */
+ for (i = 0; i < mcfg->memzone_cnt; i++) {
+ mz = &mcfg->memzone[i];
+
+ /* check if memzone has a ring prefix */
+ if (strncmp(mz->name, RTE_RING_MZ_PREFIX,
+ sizeof(RTE_RING_MZ_PREFIX) - 1) != 0)
+ continue;
+
+ r = (struct rte_ring*) (mz->addr_64);
+
+ te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0);
+ if (te == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n");
+ return -1;
+ }
+
+ te->data = (void *) r;
+
+ TAILQ_INSERT_TAIL(ring_list, te, next);
+
+ RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr);
+ }
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+ rte_memzone_dump(stdout);
+ rte_ring_list_dump(stdout);
+#endif
+
+ return 0;
+}
+
+/* initialize ivshmem structures */
+int rte_eal_ivshmem_init(void)
+{
+ struct rte_pci_device * dev;
+ struct rte_pci_resource * res;
+ int fd, ret;
+ char path[PATH_MAX];
+
+ /* initialize everything to 0 */
+ memset(path, 0, sizeof(path));
+ ivshmem_config = NULL;
+
+ pagesz = getpagesize();
+
+ RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n");
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+
+ if (open_shared_config() < 0) {
+ RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n");
+ return -1;
+ }
+ }
+ else {
+
+ TAILQ_FOREACH(dev, &pci_device_list, next) {
+
+ if (is_ivshmem_device(dev)) {
+
+ /* IVSHMEM memory is always on BAR2 */
+ res = &dev->mem_resource[2];
+
+ /* if we don't have a BAR2 */
+ if (res->len == 0)
+ continue;
+
+ /* construct pci device path */
+ snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH,
+ dev->addr.domain, dev->addr.bus, dev->addr.devid,
+ dev->addr.function);
+
+ /* try to find memseg */
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s\n", path);
+ return -1;
+ }
+
+ /* check if it's a DPDK IVSHMEM device */
+ ret = has_ivshmem_metadata(fd, res->len);
+
+ /* is DPDK device */
+ if (ret == 1) {
+
+ /* config file creation is deferred until the first
+ * DPDK device is found. then, it has to be created
+ * only once. */
+ if (ivshmem_config == NULL &&
+ create_shared_config() < 0) {
+ RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n");
+ close(fd);
+ return -1;
+ }
+
+ if (read_metadata(path, sizeof(path), fd, res->len) < 0) {
+ RTE_LOG(ERR, EAL, "Could not read metadata from"
+ " device %02x:%02x.%x!\n", dev->addr.bus,
+ dev->addr.devid, dev->addr.function);
+ close(fd);
+ return -1;
+ }
+
+ if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) {
+ RTE_LOG(WARNING, EAL,
+ "IVSHMEM PCI device limit exceeded. Increase "
+ "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in "
+ "your config file.\n");
+ break;
+ }
+
+ RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n",
+ dev->addr.bus, dev->addr.devid, dev->addr.function);
+
+ ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr;
+ snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path,
+ sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path),
+ "%s", path);
+
+ ivshmem_config->pci_devs_idx++;
+ }
+ /* failed to read */
+ else if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n",
+ strerror(errno));
+ close(fd);
+ return -1;
+ }
+ /* not a DPDK device */
+ else
+ RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n");
+
+ /* close the BAR fd */
+ close(fd);
+ }
+ }
+ }
+
+ /* ivshmem_config is not NULL only if config was created and/or mapped */
+ if (ivshmem_config) {
+ if (map_all_segments() < 0) {
+ RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n");
+ return -1;
+ }
+ }
+ else {
+ RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n");
+ }
+
+ return 0;
+}
+
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_lcore.c b/lib/librte_eal/linuxapp/eal/eal_lcore.c
new file mode 100644
index 00000000..de5b4260
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_lcore.c
@@ -0,0 +1,110 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+#include <dirent.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_filesystem.h"
+#include "eal_thread.h"
+
+#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u"
+#define CORE_ID_FILE "topology/core_id"
+#define NUMA_NODE_PATH "/sys/devices/system/node"
+
+/* Check if a cpu is present by the presence of the cpu information for it */
+int
+eal_cpu_detected(unsigned lcore_id)
+{
+ char path[PATH_MAX];
+ int len = snprintf(path, sizeof(path), SYS_CPU_DIR
+ "/"CORE_ID_FILE, lcore_id);
+ if (len <= 0 || (unsigned)len >= sizeof(path))
+ return 0;
+ if (access(path, F_OK) != 0)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Get CPU socket id (NUMA node) for a logical core.
+ *
+ * This searches each nodeX directories in /sys for the symlink for the given
+ * lcore_id and returns the numa node where the lcore is found. If lcore is not
+ * found on any numa node, returns zero.
+ */
+unsigned
+eal_cpu_socket_id(unsigned lcore_id)
+{
+ unsigned socket;
+
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "%s/node%u/cpu%u", NUMA_NODE_PATH,
+ socket, lcore_id);
+ if (access(path, F_OK) == 0)
+ return socket;
+ }
+ return 0;
+}
+
+/* Get the cpu core id value from the /sys/.../cpuX core_id value */
+unsigned
+eal_cpu_core_id(unsigned lcore_id)
+{
+ char path[PATH_MAX];
+ unsigned long id;
+
+ int len = snprintf(path, sizeof(path), SYS_CPU_DIR "/%s", lcore_id, CORE_ID_FILE);
+ if (len <= 0 || (unsigned)len >= sizeof(path))
+ goto err;
+ if (eal_parse_sysfs_value(path, &id) != 0)
+ goto err;
+ return (unsigned)id;
+
+err:
+ RTE_LOG(ERR, EAL, "Error reading core id value from %s "
+ "for lcore %u - assuming core 0\n", SYS_CPU_DIR, lcore_id);
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c
new file mode 100644
index 00000000..0b133c3e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_log.c
@@ -0,0 +1,146 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <syslog.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_spinlock.h>
+#include <rte_log.h>
+
+#include "eal_private.h"
+
+/*
+ * default log function, used once mempool (hence log history) is
+ * available
+ */
+static ssize_t
+console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
+{
+ char copybuf[BUFSIZ + 1];
+ ssize_t ret;
+ uint32_t loglevel;
+
+ /* add this log in history */
+ rte_log_add_in_history(buf, size);
+
+ /* write on stdout */
+ ret = fwrite(buf, 1, size, stdout);
+ fflush(stdout);
+
+ /* truncate message if too big (should not happen) */
+ if (size > BUFSIZ)
+ size = BUFSIZ;
+
+ /* Syslog error levels are from 0 to 7, so subtract 1 to convert */
+ loglevel = rte_log_cur_msg_loglevel() - 1;
+ memcpy(copybuf, buf, size);
+ copybuf[size] = '\0';
+
+ /* write on syslog too */
+ syslog(loglevel, "%s", copybuf);
+
+ return ret;
+}
+
+static cookie_io_functions_t console_log_func = {
+ .write = console_log_write,
+};
+
+/*
+ * set the log to default function, called during eal init process,
+ * once memzones are available.
+ */
+int
+rte_eal_log_init(const char *id, int facility)
+{
+ FILE *log_stream;
+
+ log_stream = fopencookie(NULL, "w+", console_log_func);
+ if (log_stream == NULL)
+ return -1;
+
+ openlog(id, LOG_NDELAY | LOG_PID, facility);
+
+ if (rte_eal_common_log_init(log_stream) < 0)
+ return -1;
+
+ return 0;
+}
+
+/* early logs */
+
+/*
+ * early log function, used during boot when mempool (hence log
+ * history) is not available
+ */
+static ssize_t
+early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
+{
+ ssize_t ret;
+ ret = fwrite(buf, size, 1, stdout);
+ fflush(stdout);
+ if (ret == 0)
+ return -1;
+ return ret;
+}
+
+static cookie_io_functions_t early_log_func = {
+ .write = early_log_write,
+};
+static FILE *early_log_stream;
+
+/*
+ * init the log library, called by rte_eal_init() to enable early
+ * logs
+ */
+int
+rte_eal_log_early_init(void)
+{
+ early_log_stream = fopencookie(NULL, "w+", early_log_func);
+ if (early_log_stream == NULL) {
+ printf("Cannot configure early_log_stream\n");
+ return -1;
+ }
+ rte_openlog_stream(early_log_stream);
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
new file mode 100644
index 00000000..5b9132c6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -0,0 +1,1559 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/* BSD LICENSE
+ *
+ * Copyright(c) 2013 6WIND.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define _FILE_OFFSET_BITS 64
+#include <errno.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <sys/file.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_hugepages.h"
+
+#ifdef RTE_LIBRTE_XEN_DOM0
+int rte_xen_dom0_supported(void)
+{
+ return internal_config.xen_dom0_support;
+}
+#endif
+
+/**
+ * @file
+ * Huge page mapping under linux
+ *
+ * To reserve a big contiguous amount of memory, we use the hugepage
+ * feature of linux. For that, we need to have hugetlbfs mounted. This
+ * code will create many files in this directory (one per page) and
+ * map them in virtual memory. For each page, we will retrieve its
+ * physical address and remap it in order to have a virtual contiguous
+ * zone as well as a physical contiguous zone.
+ */
+
+static uint64_t baseaddr_offset;
+
+static unsigned proc_pagemap_readable;
+
+#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
+
+static void
+test_proc_pagemap_readable(void)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Cannot open /proc/self/pagemap: %s. "
+ "virt2phys address translation will not work\n",
+ strerror(errno));
+ return;
+ }
+
+ /* Is readable */
+ close(fd);
+ proc_pagemap_readable = 1;
+}
+
+/* Lock page in physical memory and prevent from swapping. */
+int
+rte_mem_lock_page(const void *virt)
+{
+ unsigned long virtual = (unsigned long)virt;
+ int page_size = getpagesize();
+ unsigned long aligned = (virtual & ~ (page_size - 1));
+ return mlock((void*)aligned, page_size);
+}
+
+/*
+ * Get physical address of any mapped virtual address in the current process.
+ */
+phys_addr_t
+rte_mem_virt2phy(const void *virtaddr)
+{
+ int fd;
+ uint64_t page, physaddr;
+ unsigned long virt_pfn;
+ int page_size;
+ off_t offset;
+
+ /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
+ if (!proc_pagemap_readable)
+ return RTE_BAD_PHYS_ADDR;
+
+ /* standard page size */
+ page_size = getpagesize();
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
+ __func__, strerror(errno));
+ return RTE_BAD_PHYS_ADDR;
+ }
+
+ virt_pfn = (unsigned long)virtaddr / page_size;
+ offset = sizeof(uint64_t) * virt_pfn;
+ if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
+ RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
+ __func__, strerror(errno));
+ close(fd);
+ return RTE_BAD_PHYS_ADDR;
+ }
+ if (read(fd, &page, sizeof(uint64_t)) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
+ __func__, strerror(errno));
+ close(fd);
+ return RTE_BAD_PHYS_ADDR;
+ }
+
+ /*
+ * the pfn (page frame number) are bits 0-54 (see
+ * pagemap.txt in linux Documentation)
+ */
+ physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ + ((unsigned long)virtaddr % page_size);
+ close(fd);
+ return physaddr;
+}
+
+/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value. We find
+ * it by browsing the /proc/self/pagemap special file.
+ */
+static int
+find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned i;
+ phys_addr_t addr;
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ addr = rte_mem_virt2phy(hugepg_tbl[i].orig_va);
+ if (addr == RTE_BAD_PHYS_ADDR)
+ return -1;
+ hugepg_tbl[i].physaddr = addr;
+ }
+ return 0;
+}
+
+/*
+ * Check whether address-space layout randomization is enabled in
+ * the kernel. This is important for multi-process as it can prevent
+ * two processes mapping data to the same virtual address
+ * Returns:
+ * 0 - address space randomization disabled
+ * 1/2 - address space randomization enabled
+ * negative error code on error
+ */
+static int
+aslr_enabled(void)
+{
+ char c;
+ int retval, fd = open(RANDOMIZE_VA_SPACE_FILE, O_RDONLY);
+ if (fd < 0)
+ return -errno;
+ retval = read(fd, &c, 1);
+ close(fd);
+ if (retval < 0)
+ return -errno;
+ if (retval == 0)
+ return -EIO;
+ switch (c) {
+ case '0' : return 0;
+ case '1' : return 1;
+ case '2' : return 2;
+ default: return -EINVAL;
+ }
+}
+
+/*
+ * Try to mmap *size bytes in /dev/zero. If it is successful, return the
+ * pointer to the mmap'd area and keep *size unmodified. Else, retry
+ * with a smaller zone: decrease *size by hugepage_sz until it reaches
+ * 0. In this case, return NULL. Note: this function returns an address
+ * which is a multiple of hugepage size.
+ */
+static void *
+get_virtual_area(size_t *size, size_t hugepage_sz)
+{
+ void *addr;
+ int fd;
+ long aligned_addr;
+
+ if (internal_config.base_virtaddr != 0) {
+ addr = (void*) (uintptr_t) (internal_config.base_virtaddr +
+ baseaddr_offset);
+ }
+ else addr = NULL;
+
+ RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd < 0){
+ RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
+ return NULL;
+ }
+ do {
+ addr = mmap(addr,
+ (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (addr == MAP_FAILED)
+ *size -= hugepage_sz;
+ } while (addr == MAP_FAILED && *size > 0);
+
+ if (addr == MAP_FAILED) {
+ close(fd);
+ RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
+ strerror(errno));
+ return NULL;
+ }
+
+ munmap(addr, (*size) + hugepage_sz);
+ close(fd);
+
+ /* align addr to a huge page size boundary */
+ aligned_addr = (long)addr;
+ aligned_addr += (hugepage_sz - 1);
+ aligned_addr &= (~(hugepage_sz - 1));
+ addr = (void *)(aligned_addr);
+
+ RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
+ addr, *size);
+
+ /* increment offset */
+ baseaddr_offset += *size;
+
+ return addr;
+}
+
+/*
+ * Mmap all hugepages of hugepage table: it first open a file in
+ * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
+ * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored
+ * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
+ * map continguous physical blocks in contiguous virtual blocks.
+ */
+static int
+map_all_hugepages(struct hugepage_file *hugepg_tbl,
+ struct hugepage_info *hpi, int orig)
+{
+ int fd;
+ unsigned i;
+ void *virtaddr;
+ void *vma_addr = NULL;
+ size_t vma_len = 0;
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ RTE_SET_USED(vma_len);
+#endif
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ uint64_t hugepage_sz = hpi->hugepage_sz;
+
+ if (orig) {
+ hugepg_tbl[i].file_id = i;
+ hugepg_tbl[i].size = hugepage_sz;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ eal_get_hugefile_temp_path(hugepg_tbl[i].filepath,
+ sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
+ hugepg_tbl[i].file_id);
+#else
+ eal_get_hugefile_path(hugepg_tbl[i].filepath,
+ sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
+ hugepg_tbl[i].file_id);
+#endif
+ hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
+ }
+#ifndef RTE_ARCH_64
+ /* for 32-bit systems, don't remap 1G and 16G pages, just reuse
+ * original map address as final map address.
+ */
+ else if ((hugepage_sz == RTE_PGSIZE_1G)
+ || (hugepage_sz == RTE_PGSIZE_16G)) {
+ hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
+ hugepg_tbl[i].orig_va = NULL;
+ continue;
+ }
+#endif
+
+#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS
+ else if (vma_len == 0) {
+ unsigned j, num_pages;
+
+ /* reserve a virtual area for next contiguous
+ * physical block: count the number of
+ * contiguous physical pages. */
+ for (j = i+1; j < hpi->num_pages[0] ; j++) {
+#ifdef RTE_ARCH_PPC_64
+ /* The physical addresses are sorted in
+ * descending order on PPC64 */
+ if (hugepg_tbl[j].physaddr !=
+ hugepg_tbl[j-1].physaddr - hugepage_sz)
+ break;
+#else
+ if (hugepg_tbl[j].physaddr !=
+ hugepg_tbl[j-1].physaddr + hugepage_sz)
+ break;
+#endif
+ }
+ num_pages = j - i;
+ vma_len = num_pages * hugepage_sz;
+
+ /* get the biggest virtual memory area up to
+ * vma_len. If it fails, vma_addr is NULL, so
+ * let the kernel provide the address. */
+ vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
+ if (vma_addr == NULL)
+ vma_len = hugepage_sz;
+ }
+#endif
+
+ /* try to create hugepage file */
+ fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__,
+ strerror(errno));
+ return -1;
+ }
+
+ /* map the segment, and populate page tables,
+ * the kernel fills this segment with zeros */
+ virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (virtaddr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__,
+ strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ if (orig) {
+ hugepg_tbl[i].orig_va = virtaddr;
+ }
+ else {
+ hugepg_tbl[i].final_va = virtaddr;
+ }
+
+ /* set shared flock on the file. */
+ if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+ RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
+ __func__, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+
+ vma_addr = (char *)vma_addr + hugepage_sz;
+ vma_len -= hugepage_sz;
+ }
+ return 0;
+}
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+
+/*
+ * Remaps all hugepages into single file segments
+ */
+static int
+remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ int fd;
+ unsigned i = 0, j, num_pages, page_idx = 0;
+ void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL;
+ size_t vma_len = 0;
+ size_t hugepage_sz = hpi->hugepage_sz;
+ size_t total_size, offset;
+ char filepath[MAX_HUGEPAGE_PATH];
+ phys_addr_t physaddr;
+ int socket;
+
+ while (i < hpi->num_pages[0]) {
+
+#ifndef RTE_ARCH_64
+ /* for 32-bit systems, don't remap 1G pages and 16G pages,
+ * just reuse original map address as final map address.
+ */
+ if ((hugepage_sz == RTE_PGSIZE_1G)
+ || (hugepage_sz == RTE_PGSIZE_16G)) {
+ hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
+ hugepg_tbl[i].orig_va = NULL;
+ i++;
+ continue;
+ }
+#endif
+
+ /* reserve a virtual area for next contiguous
+ * physical block: count the number of
+ * contiguous physical pages. */
+ for (j = i+1; j < hpi->num_pages[0] ; j++) {
+#ifdef RTE_ARCH_PPC_64
+ /* The physical addresses are sorted in descending
+ * order on PPC64 */
+ if (hugepg_tbl[j].physaddr !=
+ hugepg_tbl[j-1].physaddr - hugepage_sz)
+ break;
+#else
+ if (hugepg_tbl[j].physaddr !=
+ hugepg_tbl[j-1].physaddr + hugepage_sz)
+ break;
+#endif
+ }
+ num_pages = j - i;
+ vma_len = num_pages * hugepage_sz;
+
+ socket = hugepg_tbl[i].socket_id;
+
+ /* get the biggest virtual memory area up to
+ * vma_len. If it fails, vma_addr is NULL, so
+ * let the kernel provide the address. */
+ vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
+
+ /* If we can't find a big enough virtual area, work out how many pages
+ * we are going to get */
+ if (vma_addr == NULL)
+ j = i + 1;
+ else if (vma_len != num_pages * hugepage_sz) {
+ num_pages = vma_len / hugepage_sz;
+ j = i + num_pages;
+
+ }
+
+ hugepg_tbl[page_idx].file_id = page_idx;
+ eal_get_hugefile_path(filepath,
+ sizeof(filepath),
+ hpi->hugedir,
+ hugepg_tbl[page_idx].file_id);
+
+ /* try to create hugepage file */
+ fd = open(filepath, O_CREAT | O_RDWR, 0755);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno));
+ return -1;
+ }
+
+ total_size = 0;
+ for (;i < j; i++) {
+
+ /* unmap current segment */
+ if (total_size > 0)
+ munmap(vma_addr, total_size);
+
+ /* unmap original page */
+ munmap(hugepg_tbl[i].orig_va, hugepage_sz);
+ unlink(hugepg_tbl[i].filepath);
+
+ total_size += hugepage_sz;
+
+ old_addr = vma_addr;
+
+ /* map new, bigger segment, and populate page tables,
+ * the kernel fills this segment with zeros */
+ vma_addr = mmap(vma_addr, total_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0);
+
+ if (vma_addr == MAP_FAILED || vma_addr != old_addr) {
+ RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno));
+ close(fd);
+ return -1;
+ }
+ }
+
+ /* set shared flock on the file. */
+ if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+ RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
+ __func__, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s",
+ filepath);
+
+ physaddr = rte_mem_virt2phy(vma_addr);
+
+ if (physaddr == RTE_BAD_PHYS_ADDR)
+ return -1;
+
+ hugepg_tbl[page_idx].final_va = vma_addr;
+
+ hugepg_tbl[page_idx].physaddr = physaddr;
+
+ hugepg_tbl[page_idx].repeated = num_pages;
+
+ hugepg_tbl[page_idx].socket_id = socket;
+
+ close(fd);
+
+ /* verify the memory segment - that is, check that every VA corresponds
+ * to the physical address we expect to see
+ */
+ for (offset = 0; offset < vma_len; offset += hugepage_sz) {
+ uint64_t expected_physaddr;
+
+ expected_physaddr = hugepg_tbl[page_idx].physaddr + offset;
+ page_addr = RTE_PTR_ADD(vma_addr, offset);
+ physaddr = rte_mem_virt2phy(page_addr);
+
+ if (physaddr != expected_physaddr) {
+ RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr "
+ "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64
+ " (expected 0x%" PRIx64 ")\n",
+ page_addr, offset, physaddr, expected_physaddr);
+ return -1;
+ }
+ }
+
+ page_idx++;
+ }
+
+ /* zero out the rest */
+ memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file));
+ return page_idx;
+}
+#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */
+
+/* Unmap all hugepages from original mapping */
+static int
+unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned i;
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ if (hugepg_tbl[i].orig_va) {
+ munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz);
+ hugepg_tbl[i].orig_va = NULL;
+ }
+ }
+ return 0;
+}
+#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */
+
+/*
+ * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
+ * page.
+ */
+static int
+find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ int socket_id;
+ char *end, *nodestr;
+ unsigned i, hp_count = 0;
+ uint64_t virt_addr;
+ char buf[BUFSIZ];
+ char hugedir_str[PATH_MAX];
+ FILE *f;
+
+ f = fopen("/proc/self/numa_maps", "r");
+ if (f == NULL) {
+ RTE_LOG(NOTICE, EAL, "cannot open /proc/self/numa_maps,"
+ " consider that all memory is in socket_id 0\n");
+ return 0;
+ }
+
+ snprintf(hugedir_str, sizeof(hugedir_str),
+ "%s/%s", hpi->hugedir, internal_config.hugefile_prefix);
+
+ /* parse numa map */
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+
+ /* ignore non huge page */
+ if (strstr(buf, " huge ") == NULL &&
+ strstr(buf, hugedir_str) == NULL)
+ continue;
+
+ /* get zone addr */
+ virt_addr = strtoull(buf, &end, 16);
+ if (virt_addr == 0 || end == buf) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+
+ /* get node id (socket id) */
+ nodestr = strstr(buf, " N");
+ if (nodestr == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+ nodestr += 2;
+ end = strstr(nodestr, "=");
+ if (end == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+ end[0] = '\0';
+ end = NULL;
+
+ socket_id = strtoul(nodestr, &end, 0);
+ if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+
+ /* if we find this page in our mappings, set socket_id */
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ void *va = (void *)(unsigned long)virt_addr;
+ if (hugepg_tbl[i].orig_va == va) {
+ hugepg_tbl[i].socket_id = socket_id;
+ hp_count++;
+ }
+ }
+ }
+
+ if (hp_count < hpi->num_pages[0])
+ goto error;
+
+ fclose(f);
+ return 0;
+
+error:
+ fclose(f);
+ return -1;
+}
+
+static int
+cmp_physaddr(const void *a, const void *b)
+{
+#ifndef RTE_ARCH_PPC_64
+ const struct hugepage_file *p1 = (const struct hugepage_file *)a;
+ const struct hugepage_file *p2 = (const struct hugepage_file *)b;
+#else
+ /* PowerPC needs memory sorted in reverse order from x86 */
+ const struct hugepage_file *p1 = (const struct hugepage_file *)b;
+ const struct hugepage_file *p2 = (const struct hugepage_file *)a;
+#endif
+ if (p1->physaddr < p2->physaddr)
+ return -1;
+ else if (p1->physaddr > p2->physaddr)
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * Uses mmap to create a shared memory area for storage of data
+ * Used in this file to store the hugepage file map on disk
+ */
+static void *
+create_shared_memory(const char *filename, const size_t mem_size)
+{
+ void *retval;
+ int fd = open(filename, O_CREAT | O_RDWR, 0666);
+ if (fd < 0)
+ return NULL;
+ if (ftruncate(fd, mem_size) < 0) {
+ close(fd);
+ return NULL;
+ }
+ retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ close(fd);
+ return retval;
+}
+
+/*
+ * this copies *active* hugepages from one hugepage table to another.
+ * destination is typically the shared memory.
+ */
+static int
+copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size,
+ const struct hugepage_file * src, int src_size)
+{
+ int src_pos, dst_pos = 0;
+
+ for (src_pos = 0; src_pos < src_size; src_pos++) {
+ if (src[src_pos].final_va != NULL) {
+ /* error on overflow attempt */
+ if (dst_pos == dest_size)
+ return -1;
+ memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file));
+ dst_pos++;
+ }
+ }
+ return 0;
+}
+
+static int
+unlink_hugepage_files(struct hugepage_file *hugepg_tbl,
+ unsigned num_hp_info)
+{
+ unsigned socket, size;
+ int page, nrpages = 0;
+
+ /* get total number of hugepages */
+ for (size = 0; size < num_hp_info; size++)
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
+ nrpages +=
+ internal_config.hugepage_info[size].num_pages[socket];
+
+ for (page = 0; page < nrpages; page++) {
+ struct hugepage_file *hp = &hugepg_tbl[page];
+
+ if (hp->final_va != NULL && unlink(hp->filepath)) {
+ RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n",
+ __func__, hp->filepath, strerror(errno));
+ }
+ }
+ return 0;
+}
+
+/*
+ * unmaps hugepages that are not going to be used. since we originally allocate
+ * ALL hugepages (not just those we need), additional unmapping needs to be done.
+ */
+static int
+unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
+ struct hugepage_info *hpi,
+ unsigned num_hp_info)
+{
+ unsigned socket, size;
+ int page, nrpages = 0;
+
+ /* get total number of hugepages */
+ for (size = 0; size < num_hp_info; size++)
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
+ nrpages += internal_config.hugepage_info[size].num_pages[socket];
+
+ for (size = 0; size < num_hp_info; size++) {
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
+ unsigned pages_found = 0;
+
+ /* traverse until we have unmapped all the unused pages */
+ for (page = 0; page < nrpages; page++) {
+ struct hugepage_file *hp = &hugepg_tbl[page];
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ /* if this page was already cleared */
+ if (hp->final_va == NULL)
+ continue;
+#endif
+
+ /* find a page that matches the criteria */
+ if ((hp->size == hpi[size].hugepage_sz) &&
+ (hp->socket_id == (int) socket)) {
+
+ /* if we skipped enough pages, unmap the rest */
+ if (pages_found == hpi[size].num_pages[socket]) {
+ uint64_t unmap_len;
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ unmap_len = hp->size * hp->repeated;
+#else
+ unmap_len = hp->size;
+#endif
+
+ /* get start addr and len of the remaining segment */
+ munmap(hp->final_va, (size_t) unmap_len);
+
+ hp->final_va = NULL;
+ if (unlink(hp->filepath) == -1) {
+ RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n",
+ __func__, hp->filepath, strerror(errno));
+ return -1;
+ }
+ }
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ /* else, check how much do we need to map */
+ else {
+ int nr_pg_left =
+ hpi[size].num_pages[socket] - pages_found;
+
+ /* if we need enough memory to fit into the segment */
+ if (hp->repeated <= nr_pg_left) {
+ pages_found += hp->repeated;
+ }
+ /* truncate the segment */
+ else {
+ uint64_t final_size = nr_pg_left * hp->size;
+ uint64_t seg_size = hp->repeated * hp->size;
+
+ void * unmap_va = RTE_PTR_ADD(hp->final_va,
+ final_size);
+ int fd;
+
+ munmap(unmap_va, seg_size - final_size);
+
+ fd = open(hp->filepath, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ hp->filepath, strerror(errno));
+ return -1;
+ }
+ if (ftruncate(fd, final_size) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n",
+ hp->filepath, strerror(errno));
+ return -1;
+ }
+ close(fd);
+
+ pages_found += nr_pg_left;
+ hp->repeated = nr_pg_left;
+ }
+ }
+#else
+ /* else, lock the page and skip */
+ else
+ pages_found++;
+#endif
+
+ } /* match page */
+ } /* foreach page */
+ } /* foreach socket */
+ } /* foreach pagesize */
+
+ return 0;
+}
+
+static inline uint64_t
+get_socket_mem_size(int socket)
+{
+ uint64_t size = 0;
+ unsigned i;
+
+ for (i = 0; i < internal_config.num_hugepage_sizes; i++){
+ struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+ if (hpi->hugedir != NULL)
+ size += hpi->hugepage_sz * hpi->num_pages[socket];
+ }
+
+ return size;
+}
+
+/*
+ * This function is a NUMA-aware equivalent of calc_num_pages.
+ * It takes in the list of hugepage sizes and the
+ * number of pages thereof, and calculates the best number of
+ * pages of each size to fulfill the request for <memory> ram
+ */
+static int
+calc_num_pages_per_socket(uint64_t * memory,
+ struct hugepage_info *hp_info,
+ struct hugepage_info *hp_used,
+ unsigned num_hp_info)
+{
+ unsigned socket, j, i = 0;
+ unsigned requested, available;
+ int total_num_pages = 0;
+ uint64_t remaining_mem, cur_mem;
+ uint64_t total_mem = internal_config.memory;
+
+ if (num_hp_info == 0)
+ return -1;
+
+ /* if specific memory amounts per socket weren't requested */
+ if (internal_config.force_sockets == 0) {
+ int cpu_per_socket[RTE_MAX_NUMA_NODES];
+ size_t default_size, total_size;
+ unsigned lcore_id;
+
+ /* Compute number of cores per socket */
+ memset(cpu_per_socket, 0, sizeof(cpu_per_socket));
+ RTE_LCORE_FOREACH(lcore_id) {
+ cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++;
+ }
+
+ /*
+ * Automatically spread requested memory amongst detected sockets according
+ * to number of cores from cpu mask present on each socket
+ */
+ total_size = internal_config.memory;
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) {
+
+ /* Set memory amount per socket */
+ default_size = (internal_config.memory * cpu_per_socket[socket])
+ / rte_lcore_count();
+
+ /* Limit to maximum available memory on socket */
+ default_size = RTE_MIN(default_size, get_socket_mem_size(socket));
+
+ /* Update sizes */
+ memory[socket] = default_size;
+ total_size -= default_size;
+ }
+
+ /*
+ * If some memory is remaining, try to allocate it by getting all
+ * available memory from sockets, one after the other
+ */
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) {
+ /* take whatever is available */
+ default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket],
+ total_size);
+
+ /* Update sizes */
+ memory[socket] += default_size;
+ total_size -= default_size;
+ }
+ }
+
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) {
+ /* skips if the memory on specific socket wasn't requested */
+ for (i = 0; i < num_hp_info && memory[socket] != 0; i++){
+ hp_used[i].hugedir = hp_info[i].hugedir;
+ hp_used[i].num_pages[socket] = RTE_MIN(
+ memory[socket] / hp_info[i].hugepage_sz,
+ hp_info[i].num_pages[socket]);
+
+ cur_mem = hp_used[i].num_pages[socket] *
+ hp_used[i].hugepage_sz;
+
+ memory[socket] -= cur_mem;
+ total_mem -= cur_mem;
+
+ total_num_pages += hp_used[i].num_pages[socket];
+
+ /* check if we have met all memory requests */
+ if (memory[socket] == 0)
+ break;
+
+ /* check if we have any more pages left at this size, if so
+ * move on to next size */
+ if (hp_used[i].num_pages[socket] == hp_info[i].num_pages[socket])
+ continue;
+ /* At this point we know that there are more pages available that are
+ * bigger than the memory we want, so lets see if we can get enough
+ * from other page sizes.
+ */
+ remaining_mem = 0;
+ for (j = i+1; j < num_hp_info; j++)
+ remaining_mem += hp_info[j].hugepage_sz *
+ hp_info[j].num_pages[socket];
+
+ /* is there enough other memory, if not allocate another page and quit */
+ if (remaining_mem < memory[socket]){
+ cur_mem = RTE_MIN(memory[socket],
+ hp_info[i].hugepage_sz);
+ memory[socket] -= cur_mem;
+ total_mem -= cur_mem;
+ hp_used[i].num_pages[socket]++;
+ total_num_pages++;
+ break; /* we are done with this socket*/
+ }
+ }
+ /* if we didn't satisfy all memory requirements per socket */
+ if (memory[socket] > 0) {
+ /* to prevent icc errors */
+ requested = (unsigned) (internal_config.socket_mem[socket] /
+ 0x100000);
+ available = requested -
+ ((unsigned) (memory[socket] / 0x100000));
+ RTE_LOG(ERR, EAL, "Not enough memory available on socket %u! "
+ "Requested: %uMB, available: %uMB\n", socket,
+ requested, available);
+ return -1;
+ }
+ }
+
+ /* if we didn't satisfy total memory requirements */
+ if (total_mem > 0) {
+ requested = (unsigned) (internal_config.memory / 0x100000);
+ available = requested - (unsigned) (total_mem / 0x100000);
+ RTE_LOG(ERR, EAL, "Not enough memory available! Requested: %uMB,"
+ " available: %uMB\n", requested, available);
+ return -1;
+ }
+ return total_num_pages;
+}
+
+/*
+ * Prepare physical memory mapping: fill configuration structure with
+ * these infos, return 0 on success.
+ * 1. map N huge pages in separate files in hugetlbfs
+ * 2. find associated physical addr
+ * 3. find associated NUMA socket ID
+ * 4. sort all huge pages by physical address
+ * 5. remap these N huge pages in the correct order
+ * 6. unmap the first mapping
+ * 7. fill memsegs in configuration with contiguous zones
+ */
+int
+rte_eal_hugepage_init(void)
+{
+ struct rte_mem_config *mcfg;
+ struct hugepage_file *hugepage, *tmp_hp = NULL;
+ struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
+
+ uint64_t memory[RTE_MAX_NUMA_NODES];
+
+ unsigned hp_offset;
+ int i, j, new_memseg;
+ int nr_hugefiles, nr_hugepages = 0;
+ void *addr;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ int new_pages_count[MAX_HUGEPAGE_SIZES];
+#endif
+
+ test_proc_pagemap_readable();
+
+ memset(used_hp, 0, sizeof(used_hp));
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* hugetlbfs can be disabled */
+ if (internal_config.no_hugetlbfs) {
+ addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
+ strerror(errno));
+ return -1;
+ }
+ mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr;
+ mcfg->memseg[0].addr = addr;
+ mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
+ mcfg->memseg[0].len = internal_config.memory;
+ mcfg->memseg[0].socket_id = 0;
+ return 0;
+ }
+
+/* check if app runs on Xen Dom0 */
+ if (internal_config.xen_dom0_support) {
+#ifdef RTE_LIBRTE_XEN_DOM0
+ /* use dom0_mm kernel driver to init memory */
+ if (rte_xen_dom0_memory_init() < 0)
+ return -1;
+ else
+ return 0;
+#endif
+ }
+
+ /* calculate total number of hugepages available. at this point we haven't
+ * yet started sorting them so they all are on socket 0 */
+ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
+ /* meanwhile, also initialize used_hp hugepage sizes in used_hp */
+ used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz;
+
+ nr_hugepages += internal_config.hugepage_info[i].num_pages[0];
+ }
+
+ /*
+ * allocate a memory area for hugepage table.
+ * this isn't shared memory yet. due to the fact that we need some
+ * processing done on these pages, shared memory will be created
+ * at a later stage.
+ */
+ tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file));
+ if (tmp_hp == NULL)
+ goto fail;
+
+ memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file));
+
+ hp_offset = 0; /* where we start the current page size entries */
+
+ /* map all hugepages and sort them */
+ for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){
+ struct hugepage_info *hpi;
+
+ /*
+ * we don't yet mark hugepages as used at this stage, so
+ * we just map all hugepages available to the system
+ * all hugepages are still located on socket 0
+ */
+ hpi = &internal_config.hugepage_info[i];
+
+ if (hpi->num_pages[0] == 0)
+ continue;
+
+ /* map all hugepages available */
+ if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+
+ /* find physical addresses and sockets for each hugepage */
+ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+
+ if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+
+ qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
+ sizeof(struct hugepage_file), cmp_physaddr);
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ /* remap all hugepages into single file segments */
+ new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi);
+ if (new_pages_count[i] < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+
+ /* we have processed a num of hugepages of this size, so inc offset */
+ hp_offset += new_pages_count[i];
+#else
+ /* remap all hugepages */
+ if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+
+ /* unmap original mappings */
+ if (unmap_all_hugepages_orig(&tmp_hp[hp_offset], hpi) < 0)
+ goto fail;
+
+ /* we have processed a num of hugepages of this size, so inc offset */
+ hp_offset += hpi->num_pages[0];
+#endif
+ }
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ nr_hugefiles = 0;
+ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
+ nr_hugefiles += new_pages_count[i];
+ }
+#else
+ nr_hugefiles = nr_hugepages;
+#endif
+
+
+ /* clean out the numbers of pages */
+ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++)
+ for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
+ internal_config.hugepage_info[i].num_pages[j] = 0;
+
+ /* get hugepages for each socket */
+ for (i = 0; i < nr_hugefiles; i++) {
+ int socket = tmp_hp[i].socket_id;
+
+ /* find a hugepage info with right size and increment num_pages */
+ const int nb_hpsizes = RTE_MIN(MAX_HUGEPAGE_SIZES,
+ (int)internal_config.num_hugepage_sizes);
+ for (j = 0; j < nb_hpsizes; j++) {
+ if (tmp_hp[i].size ==
+ internal_config.hugepage_info[j].hugepage_sz) {
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ internal_config.hugepage_info[j].num_pages[socket] +=
+ tmp_hp[i].repeated;
+#else
+ internal_config.hugepage_info[j].num_pages[socket]++;
+#endif
+ }
+ }
+ }
+
+ /* make a copy of socket_mem, needed for number of pages calculation */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ memory[i] = internal_config.socket_mem[i];
+
+ /* calculate final number of pages */
+ nr_hugepages = calc_num_pages_per_socket(memory,
+ internal_config.hugepage_info, used_hp,
+ internal_config.num_hugepage_sizes);
+
+ /* error if not enough memory available */
+ if (nr_hugepages < 0)
+ goto fail;
+
+ /* reporting in! */
+ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
+ for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
+ if (used_hp[i].num_pages[j] > 0) {
+ RTE_LOG(DEBUG, EAL,
+ "Requesting %u pages of size %uMB"
+ " from socket %i\n",
+ used_hp[i].num_pages[j],
+ (unsigned)
+ (used_hp[i].hugepage_sz / 0x100000),
+ j);
+ }
+ }
+ }
+
+ /* create shared memory */
+ hugepage = create_shared_memory(eal_hugepage_info_path(),
+ nr_hugefiles * sizeof(struct hugepage_file));
+
+ if (hugepage == NULL) {
+ RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
+ goto fail;
+ }
+ memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file));
+
+ /*
+ * unmap pages that we won't need (looks at used_hp).
+ * also, sets final_va to NULL on pages that were unmapped.
+ */
+ if (unmap_unneeded_hugepages(tmp_hp, used_hp,
+ internal_config.num_hugepage_sizes) < 0) {
+ RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n");
+ goto fail;
+ }
+
+ /*
+ * copy stuff from malloc'd hugepage* to the actual shared memory.
+ * this procedure only copies those hugepages that have final_va
+ * not NULL. has overflow protection.
+ */
+ if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles,
+ tmp_hp, nr_hugefiles) < 0) {
+ RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n");
+ goto fail;
+ }
+
+ /* free the hugepage backing files */
+ if (internal_config.hugepage_unlink &&
+ unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) {
+ RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n");
+ goto fail;
+ }
+
+ /* free the temporary hugepage table */
+ free(tmp_hp);
+ tmp_hp = NULL;
+
+ /* find earliest free memseg - this is needed because in case of IVSHMEM,
+ * segments might have already been initialized */
+ for (j = 0; j < RTE_MAX_MEMSEG; j++)
+ if (mcfg->memseg[j].addr == NULL) {
+ /* move to previous segment and exit loop */
+ j--;
+ break;
+ }
+
+ for (i = 0; i < nr_hugefiles; i++) {
+ new_memseg = 0;
+
+ /* if this is a new section, create a new memseg */
+ if (i == 0)
+ new_memseg = 1;
+ else if (hugepage[i].socket_id != hugepage[i-1].socket_id)
+ new_memseg = 1;
+ else if (hugepage[i].size != hugepage[i-1].size)
+ new_memseg = 1;
+
+#ifdef RTE_ARCH_PPC_64
+ /* On PPC64 architecture, the mmap always start from higher
+ * virtual address to lower address. Here, both the physical
+ * address and virtual address are in descending order */
+ else if ((hugepage[i-1].physaddr - hugepage[i].physaddr) !=
+ hugepage[i].size)
+ new_memseg = 1;
+ else if (((unsigned long)hugepage[i-1].final_va -
+ (unsigned long)hugepage[i].final_va) != hugepage[i].size)
+ new_memseg = 1;
+#else
+ else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) !=
+ hugepage[i].size)
+ new_memseg = 1;
+ else if (((unsigned long)hugepage[i].final_va -
+ (unsigned long)hugepage[i-1].final_va) != hugepage[i].size)
+ new_memseg = 1;
+#endif
+
+ if (new_memseg) {
+ j += 1;
+ if (j == RTE_MAX_MEMSEG)
+ break;
+
+ mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
+ mcfg->memseg[j].addr = hugepage[i].final_va;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated;
+#else
+ mcfg->memseg[j].len = hugepage[i].size;
+#endif
+ mcfg->memseg[j].socket_id = hugepage[i].socket_id;
+ mcfg->memseg[j].hugepage_sz = hugepage[i].size;
+ }
+ /* continuation of previous memseg */
+ else {
+#ifdef RTE_ARCH_PPC_64
+ /* Use the phy and virt address of the last page as segment
+ * address for IBM Power architecture */
+ mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
+ mcfg->memseg[j].addr = hugepage[i].final_va;
+#endif
+ mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz;
+ }
+ hugepage[i].memseg_id = j;
+ }
+
+ if (i < nr_hugefiles) {
+ RTE_LOG(ERR, EAL, "Can only reserve %d pages "
+ "from %d requested\n"
+ "Current %s=%d is not enough\n"
+ "Please either increase it or request less amount "
+ "of memory.\n",
+ i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG),
+ RTE_MAX_MEMSEG);
+ return -ENOMEM;
+ }
+
+ return 0;
+
+fail:
+ free(tmp_hp);
+ return -1;
+}
+
+/*
+ * uses fstat to report the size of a file on disk
+ */
+static off_t
+getFileSize(int fd)
+{
+ struct stat st;
+ if (fstat(fd, &st) < 0)
+ return 0;
+ return st.st_size;
+}
+
+/*
+ * This creates the memory mappings in the secondary process to match that of
+ * the server process. It goes through each memory segment in the DPDK runtime
+ * configuration and finds the hugepages which form that segment, mapping them
+ * in order to form a contiguous block in the virtual memory space
+ */
+int
+rte_eal_hugepage_attach(void)
+{
+ const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ const struct hugepage_file *hp = NULL;
+ unsigned num_hp = 0;
+ unsigned i, s = 0; /* s used to track the segment number */
+ off_t size;
+ int fd, fd_zero = -1, fd_hugepage = -1;
+
+ if (aslr_enabled() > 0) {
+ RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization "
+ "(ASLR) is enabled in the kernel.\n");
+ RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory "
+ "into secondary processes\n");
+ }
+
+ test_proc_pagemap_readable();
+
+ if (internal_config.xen_dom0_support) {
+#ifdef RTE_LIBRTE_XEN_DOM0
+ if (rte_xen_dom0_memory_attach() < 0) {
+ RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay "
+ "process\n");
+ return -1;
+ }
+ return 0;
+#endif
+ }
+
+ fd_zero = open("/dev/zero", O_RDONLY);
+ if (fd_zero < 0) {
+ RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");
+ goto error;
+ }
+ fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY);
+ if (fd_hugepage < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
+ goto error;
+ }
+
+ /* map all segments into memory to make sure we get the addrs */
+ for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
+ void *base_addr;
+
+ /*
+ * the first memory segment with len==0 is the one that
+ * follows the last valid segment.
+ */
+ if (mcfg->memseg[s].len == 0)
+ break;
+
+#ifdef RTE_LIBRTE_IVSHMEM
+ /*
+ * if segment has ioremap address set, it's an IVSHMEM segment and
+ * doesn't need mapping as it was already mapped earlier
+ */
+ if (mcfg->memseg[s].ioremap_addr != 0)
+ continue;
+#endif
+
+ /*
+ * fdzero is mmapped to get a contiguous block of virtual
+ * addresses of the appropriate memseg size.
+ * use mmap to get identical addresses as the primary process.
+ */
+ base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
+ PROT_READ, MAP_PRIVATE, fd_zero, 0);
+ if (base_addr == MAP_FAILED ||
+ base_addr != mcfg->memseg[s].addr) {
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+ "in /dev/zero to requested address [%p]: '%s'\n",
+ (unsigned long long)mcfg->memseg[s].len,
+ mcfg->memseg[s].addr, strerror(errno));
+ if (aslr_enabled() > 0) {
+ RTE_LOG(ERR, EAL, "It is recommended to "
+ "disable ASLR in the kernel "
+ "and retry running both primary "
+ "and secondary processes\n");
+ }
+ goto error;
+ }
+ }
+
+ size = getFileSize(fd_hugepage);
+ hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0);
+ if (hp == NULL) {
+ RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
+ goto error;
+ }
+
+ num_hp = size / sizeof(struct hugepage_file);
+ RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp);
+
+ s = 0;
+ while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){
+ void *addr, *base_addr;
+ uintptr_t offset = 0;
+ size_t mapping_size;
+#ifdef RTE_LIBRTE_IVSHMEM
+ /*
+ * if segment has ioremap address set, it's an IVSHMEM segment and
+ * doesn't need mapping as it was already mapped earlier
+ */
+ if (mcfg->memseg[s].ioremap_addr != 0) {
+ s++;
+ continue;
+ }
+#endif
+ /*
+ * free previously mapped memory so we can map the
+ * hugepages into the space
+ */
+ base_addr = mcfg->memseg[s].addr;
+ munmap(base_addr, mcfg->memseg[s].len);
+
+ /* find the hugepages for this segment and map them
+ * we don't need to worry about order, as the server sorted the
+ * entries before it did the second mmap of them */
+ for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){
+ if (hp[i].memseg_id == (int)s){
+ fd = open(hp[i].filepath, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s\n",
+ hp[i].filepath);
+ goto error;
+ }
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ mapping_size = hp[i].size * hp[i].repeated;
+#else
+ mapping_size = hp[i].size;
+#endif
+ addr = mmap(RTE_PTR_ADD(base_addr, offset),
+ mapping_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ close(fd); /* close file both on success and on failure */
+ if (addr == MAP_FAILED ||
+ addr != RTE_PTR_ADD(base_addr, offset)) {
+ RTE_LOG(ERR, EAL, "Could not mmap %s\n",
+ hp[i].filepath);
+ goto error;
+ }
+ offset+=mapping_size;
+ }
+ }
+ RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s,
+ (unsigned long long)mcfg->memseg[s].len);
+ s++;
+ }
+ /* unmap the hugepage config file, since we are done using it */
+ munmap((void *)(uintptr_t)hp, size);
+ close(fd_zero);
+ close(fd_hugepage);
+ return 0;
+
+error:
+ if (fd_zero >= 0)
+ close(fd_zero);
+ if (fd_hugepage >= 0)
+ close(fd_hugepage);
+ return -1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
new file mode 100644
index 00000000..dbf12a84
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -0,0 +1,762 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <dirent.h>
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+#include <rte_devargs.h>
+#include <rte_memcpy.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+#include "eal_pci_init.h"
+
+/**
+ * @file
+ * PCI probing under linux
+ *
+ * This code is used to simulate a PCI probe by parsing information in sysfs.
+ * When a registered device matches a driver, it is then initialized with
+ * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it).
+ */
+
+/* unbind kernel driver for this device */
+int
+pci_unbind_kernel_driver(struct rte_pci_device *dev)
+{
+ int n;
+ FILE *f;
+ char filename[PATH_MAX];
+ char buf[BUFSIZ];
+ struct rte_pci_addr *loc = &dev->addr;
+
+ /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */
+ snprintf(filename, sizeof(filename),
+ SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind",
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+ f = fopen(filename, "w");
+ if (f == NULL) /* device was not bound */
+ return 0;
+
+ n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n",
+ loc->domain, loc->bus, loc->devid, loc->function);
+ if ((n < 0) || (n >= (int)sizeof(buf))) {
+ RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__);
+ goto error;
+ }
+ if (fwrite(buf, n, 1, f) == 0) {
+ RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__,
+ filename);
+ goto error;
+ }
+
+ fclose(f);
+ return 0;
+
+error:
+ fclose(f);
+ return -1;
+}
+
+static int
+pci_get_kernel_driver_by_path(const char *filename, char *dri_name)
+{
+ int count;
+ char path[PATH_MAX];
+ char *name;
+
+ if (!filename || !dri_name)
+ return -1;
+
+ count = readlink(filename, path, PATH_MAX);
+ if (count >= PATH_MAX)
+ return -1;
+
+ /* For device does not have a driver */
+ if (count < 0)
+ return 1;
+
+ path[count] = '\0';
+
+ name = strrchr(path, '/');
+ if (name) {
+ strncpy(dri_name, name + 1, strlen(name + 1) + 1);
+ return 0;
+ }
+
+ return -1;
+}
+
+/* Map pci device */
+int
+rte_eal_pci_map_device(struct rte_pci_device *dev)
+{
+ int ret = -1;
+
+ /* try mapping the NIC resources using VFIO if it exists */
+ switch (dev->kdrv) {
+ case RTE_KDRV_VFIO:
+#ifdef VFIO_PRESENT
+ if (pci_vfio_is_enabled())
+ ret = pci_vfio_map_resource(dev);
+#endif
+ break;
+ case RTE_KDRV_IGB_UIO:
+ case RTE_KDRV_UIO_GENERIC:
+ /* map resources for devices that use uio */
+ ret = pci_uio_map_resource(dev);
+ break;
+ default:
+ RTE_LOG(DEBUG, EAL,
+ " Not managed by a supported kernel driver, skipped\n");
+ ret = 1;
+ break;
+ }
+
+ return ret;
+}
+
+/* Unmap pci device */
+void
+rte_eal_pci_unmap_device(struct rte_pci_device *dev)
+{
+ /* try unmapping the NIC resources using VFIO if it exists */
+ switch (dev->kdrv) {
+ case RTE_KDRV_VFIO:
+ RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n");
+ break;
+ case RTE_KDRV_IGB_UIO:
+ case RTE_KDRV_UIO_GENERIC:
+ /* unmap resources for devices that use uio */
+ pci_uio_unmap_resource(dev);
+ break;
+ default:
+ RTE_LOG(DEBUG, EAL,
+ " Not managed by a supported kernel driver, skipped\n");
+ break;
+ }
+}
+
+void *
+pci_find_max_end_va(void)
+{
+ const struct rte_memseg *seg = rte_eal_get_physmem_layout();
+ const struct rte_memseg *last = seg;
+ unsigned i = 0;
+
+ for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
+ if (seg->addr == NULL)
+ break;
+
+ if (seg->addr > last->addr)
+ last = seg;
+
+ }
+ return RTE_PTR_ADD(last->addr, last->len);
+}
+
+/* parse the "resource" sysfs file */
+static int
+pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
+{
+ FILE *f;
+ char buf[BUFSIZ];
+ union pci_resource_info {
+ struct {
+ char *phys_addr;
+ char *end_addr;
+ char *flags;
+ };
+ char *ptrs[PCI_RESOURCE_FMT_NVAL];
+ } res_info;
+ int i;
+ uint64_t phys_addr, end_addr, flags;
+
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n");
+ return -1;
+ }
+
+ for (i = 0; i<PCI_MAX_RESOURCE; i++) {
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot read resource\n", __func__);
+ goto error;
+ }
+
+ if (rte_strsplit(buf, sizeof(buf), res_info.ptrs, 3, ' ') != 3) {
+ RTE_LOG(ERR, EAL,
+ "%s(): bad resource format\n", __func__);
+ goto error;
+ }
+ errno = 0;
+ phys_addr = strtoull(res_info.phys_addr, NULL, 16);
+ end_addr = strtoull(res_info.end_addr, NULL, 16);
+ flags = strtoull(res_info.flags, NULL, 16);
+ if (errno != 0) {
+ RTE_LOG(ERR, EAL,
+ "%s(): bad resource format\n", __func__);
+ goto error;
+ }
+
+ if (flags & IORESOURCE_MEM) {
+ dev->mem_resource[i].phys_addr = phys_addr;
+ dev->mem_resource[i].len = end_addr - phys_addr + 1;
+ /* not mapped for now */
+ dev->mem_resource[i].addr = NULL;
+ }
+ }
+ fclose(f);
+ return 0;
+
+error:
+ fclose(f);
+ return -1;
+}
+
+/* Scan one pci sysfs entry, and fill the devices list from it. */
+static int
+pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
+ uint8_t devid, uint8_t function)
+{
+ char filename[PATH_MAX];
+ unsigned long tmp;
+ struct rte_pci_device *dev;
+ char driver[PATH_MAX];
+ int ret;
+
+ dev = malloc(sizeof(*dev));
+ if (dev == NULL)
+ return -1;
+
+ memset(dev, 0, sizeof(*dev));
+ dev->addr.domain = domain;
+ dev->addr.bus = bus;
+ dev->addr.devid = devid;
+ dev->addr.function = function;
+
+ /* get vendor id */
+ snprintf(filename, sizeof(filename), "%s/vendor", dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ dev->id.vendor_id = (uint16_t)tmp;
+
+ /* get device id */
+ snprintf(filename, sizeof(filename), "%s/device", dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ dev->id.device_id = (uint16_t)tmp;
+
+ /* get subsystem_vendor id */
+ snprintf(filename, sizeof(filename), "%s/subsystem_vendor",
+ dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ dev->id.subsystem_vendor_id = (uint16_t)tmp;
+
+ /* get subsystem_device id */
+ snprintf(filename, sizeof(filename), "%s/subsystem_device",
+ dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ dev->id.subsystem_device_id = (uint16_t)tmp;
+
+ /* get max_vfs */
+ dev->max_vfs = 0;
+ snprintf(filename, sizeof(filename), "%s/max_vfs", dirname);
+ if (!access(filename, F_OK) &&
+ eal_parse_sysfs_value(filename, &tmp) == 0)
+ dev->max_vfs = (uint16_t)tmp;
+ else {
+ /* for non igb_uio driver, need kernel version >= 3.8 */
+ snprintf(filename, sizeof(filename),
+ "%s/sriov_numvfs", dirname);
+ if (!access(filename, F_OK) &&
+ eal_parse_sysfs_value(filename, &tmp) == 0)
+ dev->max_vfs = (uint16_t)tmp;
+ }
+
+ /* get numa node */
+ snprintf(filename, sizeof(filename), "%s/numa_node",
+ dirname);
+ if (access(filename, R_OK) != 0) {
+ /* if no NUMA support, set default to 0 */
+ dev->numa_node = 0;
+ } else {
+ if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ dev->numa_node = tmp;
+ }
+
+ /* parse resources */
+ snprintf(filename, sizeof(filename), "%s/resource", dirname);
+ if (pci_parse_sysfs_resource(filename, dev) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__);
+ free(dev);
+ return -1;
+ }
+
+ /* parse driver */
+ snprintf(filename, sizeof(filename), "%s/driver", dirname);
+ ret = pci_get_kernel_driver_by_path(filename, driver);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Fail to get kernel driver\n");
+ free(dev);
+ return -1;
+ }
+
+ if (!ret) {
+ if (!strcmp(driver, "vfio-pci"))
+ dev->kdrv = RTE_KDRV_VFIO;
+ else if (!strcmp(driver, "igb_uio"))
+ dev->kdrv = RTE_KDRV_IGB_UIO;
+ else if (!strcmp(driver, "uio_pci_generic"))
+ dev->kdrv = RTE_KDRV_UIO_GENERIC;
+ else
+ dev->kdrv = RTE_KDRV_UNKNOWN;
+ } else
+ dev->kdrv = RTE_KDRV_NONE;
+
+ /* device is valid, add in list (sorted) */
+ if (TAILQ_EMPTY(&pci_device_list)) {
+ TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
+ } else {
+ struct rte_pci_device *dev2;
+ int ret;
+
+ TAILQ_FOREACH(dev2, &pci_device_list, next) {
+ ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr);
+ if (ret > 0)
+ continue;
+
+ if (ret < 0) {
+ TAILQ_INSERT_BEFORE(dev2, dev, next);
+ } else { /* already registered */
+ dev2->kdrv = dev->kdrv;
+ dev2->max_vfs = dev->max_vfs;
+ memmove(dev2->mem_resource, dev->mem_resource,
+ sizeof(dev->mem_resource));
+ free(dev);
+ }
+ return 0;
+ }
+ TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
+ }
+
+ return 0;
+}
+
+/*
+ * split up a pci address into its constituent parts.
+ */
+static int
+parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain,
+ uint8_t *bus, uint8_t *devid, uint8_t *function)
+{
+ /* first split on ':' */
+ union splitaddr {
+ struct {
+ char *domain;
+ char *bus;
+ char *devid;
+ char *function;
+ };
+ char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */
+ } splitaddr;
+
+ char *buf_copy = strndup(buf, bufsize);
+ if (buf_copy == NULL)
+ return -1;
+
+ if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':')
+ != PCI_FMT_NVAL - 1)
+ goto error;
+ /* final split is on '.' between devid and function */
+ splitaddr.function = strchr(splitaddr.devid,'.');
+ if (splitaddr.function == NULL)
+ goto error;
+ *splitaddr.function++ = '\0';
+
+ /* now convert to int values */
+ errno = 0;
+ *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16);
+ *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16);
+ *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16);
+ *function = (uint8_t)strtoul(splitaddr.function, NULL, 10);
+ if (errno != 0)
+ goto error;
+
+ free(buf_copy); /* free the copy made with strdup */
+ return 0;
+error:
+ free(buf_copy);
+ return -1;
+}
+
+/*
+ * Scan the content of the PCI bus, and the devices in the devices
+ * list
+ */
+int
+rte_eal_pci_scan(void)
+{
+ struct dirent *e;
+ DIR *dir;
+ char dirname[PATH_MAX];
+ uint16_t domain;
+ uint8_t bus, devid, function;
+
+ dir = opendir(SYSFS_PCI_DEVICES);
+ if (dir == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ while ((e = readdir(dir)) != NULL) {
+ if (e->d_name[0] == '.')
+ continue;
+
+ if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain,
+ &bus, &devid, &function) != 0)
+ continue;
+
+ snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES,
+ e->d_name);
+ if (pci_scan_one(dirname, domain, bus, devid, function) < 0)
+ goto error;
+ }
+ closedir(dir);
+ return 0;
+
+error:
+ closedir(dir);
+ return -1;
+}
+
+#ifdef RTE_PCI_CONFIG
+/*
+ * It is deprecated, all its configurations have been moved into
+ * each PMD respectively.
+ */
+void
+pci_config_space_set(__rte_unused struct rte_pci_device *dev)
+{
+ RTE_LOG(DEBUG, EAL, "Nothing here, as it is deprecated\n");
+}
+#endif
+
+/* Read PCI config space. */
+int rte_eal_pci_read_config(const struct rte_pci_device *device,
+ void *buf, size_t len, off_t offset)
+{
+ const struct rte_intr_handle *intr_handle = &device->intr_handle;
+
+ switch (intr_handle->type) {
+ case RTE_INTR_HANDLE_UIO:
+ case RTE_INTR_HANDLE_UIO_INTX:
+ return pci_uio_read_config(intr_handle, buf, len, offset);
+
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ return pci_vfio_read_config(intr_handle, buf, len, offset);
+#endif
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+}
+
+/* Write PCI config space. */
+int rte_eal_pci_write_config(const struct rte_pci_device *device,
+ const void *buf, size_t len, off_t offset)
+{
+ const struct rte_intr_handle *intr_handle = &device->intr_handle;
+
+ switch (intr_handle->type) {
+ case RTE_INTR_HANDLE_UIO:
+ case RTE_INTR_HANDLE_UIO_INTX:
+ return pci_uio_write_config(intr_handle, buf, len, offset);
+
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ return pci_vfio_write_config(intr_handle, buf, len, offset);
+#endif
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+}
+
+#if defined(RTE_ARCH_X86)
+static int
+pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
+ struct rte_pci_ioport *p)
+{
+ uint16_t start, end;
+ FILE *fp;
+ char *line = NULL;
+ char pci_id[16];
+ int found = 0;
+ size_t linesz;
+
+ snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
+ dev->addr.domain, dev->addr.bus,
+ dev->addr.devid, dev->addr.function);
+
+ fp = fopen("/proc/ioports", "r");
+ if (fp == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
+ return -1;
+ }
+
+ while (getdelim(&line, &linesz, '\n', fp) > 0) {
+ char *ptr = line;
+ char *left;
+ int n;
+
+ n = strcspn(ptr, ":");
+ ptr[n] = 0;
+ left = &ptr[n + 1];
+
+ while (*left && isspace(*left))
+ left++;
+
+ if (!strncmp(left, pci_id, strlen(pci_id))) {
+ found = 1;
+
+ while (*ptr && isspace(*ptr))
+ ptr++;
+
+ sscanf(ptr, "%04hx-%04hx", &start, &end);
+
+ break;
+ }
+ }
+
+ free(line);
+ fclose(fp);
+
+ if (!found)
+ return -1;
+
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ p->base = start;
+ RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
+
+ return 0;
+}
+#endif
+
+int
+rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
+ struct rte_pci_ioport *p)
+{
+ int ret = -1;
+
+ switch (dev->kdrv) {
+#ifdef VFIO_PRESENT
+ case RTE_KDRV_VFIO:
+ if (pci_vfio_is_enabled())
+ ret = pci_vfio_ioport_map(dev, bar, p);
+ break;
+#endif
+ case RTE_KDRV_IGB_UIO:
+ ret = pci_uio_ioport_map(dev, bar, p);
+ break;
+ case RTE_KDRV_UIO_GENERIC:
+#if defined(RTE_ARCH_X86)
+ ret = pci_ioport_map(dev, bar, p);
+#else
+ ret = pci_uio_ioport_map(dev, bar, p);
+#endif
+ break;
+ case RTE_KDRV_NONE:
+#if defined(RTE_ARCH_X86)
+ ret = pci_ioport_map(dev, bar, p);
+#endif
+ break;
+ default:
+ break;
+ }
+
+ if (!ret)
+ p->dev = dev;
+
+ return ret;
+}
+
+void
+rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
+ void *data, size_t len, off_t offset)
+{
+ switch (p->dev->kdrv) {
+#ifdef VFIO_PRESENT
+ case RTE_KDRV_VFIO:
+ pci_vfio_ioport_read(p, data, len, offset);
+ break;
+#endif
+ case RTE_KDRV_IGB_UIO:
+ pci_uio_ioport_read(p, data, len, offset);
+ break;
+ case RTE_KDRV_UIO_GENERIC:
+ pci_uio_ioport_read(p, data, len, offset);
+ break;
+ case RTE_KDRV_NONE:
+#if defined(RTE_ARCH_X86)
+ pci_uio_ioport_read(p, data, len, offset);
+#endif
+ break;
+ default:
+ break;
+ }
+}
+
+void
+rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
+ const void *data, size_t len, off_t offset)
+{
+ switch (p->dev->kdrv) {
+#ifdef VFIO_PRESENT
+ case RTE_KDRV_VFIO:
+ pci_vfio_ioport_write(p, data, len, offset);
+ break;
+#endif
+ case RTE_KDRV_IGB_UIO:
+ pci_uio_ioport_write(p, data, len, offset);
+ break;
+ case RTE_KDRV_UIO_GENERIC:
+ pci_uio_ioport_write(p, data, len, offset);
+ break;
+ case RTE_KDRV_NONE:
+#if defined(RTE_ARCH_X86)
+ pci_uio_ioport_write(p, data, len, offset);
+#endif
+ break;
+ default:
+ break;
+ }
+}
+
+int
+rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
+{
+ int ret = -1;
+
+ switch (p->dev->kdrv) {
+#ifdef VFIO_PRESENT
+ case RTE_KDRV_VFIO:
+ if (pci_vfio_is_enabled())
+ ret = pci_vfio_ioport_unmap(p);
+ break;
+#endif
+ case RTE_KDRV_IGB_UIO:
+ ret = pci_uio_ioport_unmap(p);
+ break;
+ case RTE_KDRV_UIO_GENERIC:
+#if defined(RTE_ARCH_X86)
+ ret = 0;
+#else
+ ret = pci_uio_ioport_unmap(p);
+#endif
+ break;
+ case RTE_KDRV_NONE:
+#if defined(RTE_ARCH_X86)
+ ret = 0;
+#endif
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+/* Init the PCI EAL subsystem */
+int
+rte_eal_pci_init(void)
+{
+ TAILQ_INIT(&pci_driver_list);
+ TAILQ_INIT(&pci_device_list);
+
+ /* for debug purposes, PCI can be disabled */
+ if (internal_config.no_pci)
+ return 0;
+
+ if (rte_eal_pci_scan() < 0) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__);
+ return -1;
+ }
+#ifdef VFIO_PRESENT
+ pci_vfio_enable();
+
+ if (pci_vfio_is_enabled()) {
+
+ /* if we are primary process, create a thread to communicate with
+ * secondary processes. the thread will use a socket to wait for
+ * requests from secondary process to send open file descriptors,
+ * because VFIO does not allow multiple open descriptors on a group or
+ * VFIO container.
+ */
+ if (internal_config.process_type == RTE_PROC_PRIMARY &&
+ pci_vfio_mp_sync_setup() < 0)
+ return -1;
+ }
+#endif
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
new file mode 100644
index 00000000..7011753d
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
@@ -0,0 +1,127 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_PCI_INIT_H_
+#define EAL_PCI_INIT_H_
+
+#include "eal_vfio.h"
+
+/*
+ * Helper function to map PCI resources right after hugepages in virtual memory
+ */
+extern void *pci_map_addr;
+void *pci_find_max_end_va(void);
+
+int pci_uio_alloc_resource(struct rte_pci_device *dev,
+ struct mapped_pci_resource **uio_res);
+void pci_uio_free_resource(struct rte_pci_device *dev,
+ struct mapped_pci_resource *uio_res);
+int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
+ struct mapped_pci_resource *uio_res, int map_idx);
+
+int pci_uio_read_config(const struct rte_intr_handle *intr_handle,
+ void *buf, size_t len, off_t offs);
+int pci_uio_write_config(const struct rte_intr_handle *intr_handle,
+ const void *buf, size_t len, off_t offs);
+
+int pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
+ struct rte_pci_ioport *p);
+void pci_uio_ioport_read(struct rte_pci_ioport *p,
+ void *data, size_t len, off_t offset);
+void pci_uio_ioport_write(struct rte_pci_ioport *p,
+ const void *data, size_t len, off_t offset);
+int pci_uio_ioport_unmap(struct rte_pci_ioport *p);
+
+#ifdef VFIO_PRESENT
+
+#define VFIO_MAX_GROUPS 64
+
+int pci_vfio_enable(void);
+int pci_vfio_is_enabled(void);
+int pci_vfio_mp_sync_setup(void);
+
+/* access config space */
+int pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+ void *buf, size_t len, off_t offs);
+int pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+ const void *buf, size_t len, off_t offs);
+
+int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
+ struct rte_pci_ioport *p);
+void pci_vfio_ioport_read(struct rte_pci_ioport *p,
+ void *data, size_t len, off_t offset);
+void pci_vfio_ioport_write(struct rte_pci_ioport *p,
+ const void *data, size_t len, off_t offset);
+int pci_vfio_ioport_unmap(struct rte_pci_ioport *p);
+
+/* map VFIO resource prototype */
+int pci_vfio_map_resource(struct rte_pci_device *dev);
+int pci_vfio_get_group_fd(int iommu_group_fd);
+int pci_vfio_get_container_fd(void);
+
+/*
+ * Function prototypes for VFIO multiprocess sync functions
+ */
+int vfio_mp_sync_send_request(int socket, int req);
+int vfio_mp_sync_receive_request(int socket);
+int vfio_mp_sync_send_fd(int socket, int fd);
+int vfio_mp_sync_receive_fd(int socket);
+int vfio_mp_sync_connect_to_primary(void);
+
+/* socket comm protocol definitions */
+#define SOCKET_REQ_CONTAINER 0x100
+#define SOCKET_REQ_GROUP 0x200
+#define SOCKET_OK 0x0
+#define SOCKET_NO_FD 0x1
+#define SOCKET_ERR 0xFF
+
+/*
+ * we don't need to store device fd's anywhere since they can be obtained from
+ * the group fd via an ioctl() call.
+ */
+struct vfio_group {
+ int group_no;
+ int fd;
+};
+
+struct vfio_config {
+ int vfio_enabled;
+ int vfio_container_fd;
+ int vfio_container_has_dma;
+ int vfio_group_idx;
+ struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
+};
+
+#endif
+
+#endif /* EAL_PCI_INIT_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
new file mode 100644
index 00000000..068694dc
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -0,0 +1,491 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <linux/pci_regs.h>
+
+#if defined(RTE_ARCH_X86)
+#include <sys/io.h>
+#endif
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+
+#include "eal_filesystem.h"
+#include "eal_pci_init.h"
+
+void *pci_map_addr = NULL;
+
+#define OFF_MAX ((uint64_t)(off_t)-1)
+
+int
+pci_uio_read_config(const struct rte_intr_handle *intr_handle,
+ void *buf, size_t len, off_t offset)
+{
+ return pread(intr_handle->uio_cfg_fd, buf, len, offset);
+}
+
+int
+pci_uio_write_config(const struct rte_intr_handle *intr_handle,
+ const void *buf, size_t len, off_t offset)
+{
+ return pwrite(intr_handle->uio_cfg_fd, buf, len, offset);
+}
+
+static int
+pci_uio_set_bus_master(int dev_fd)
+{
+ uint16_t reg;
+ int ret;
+
+ ret = pread(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
+ if (ret != sizeof(reg)) {
+ RTE_LOG(ERR, EAL,
+ "Cannot read command from PCI config space!\n");
+ return -1;
+ }
+
+ /* return if bus mastering is already on */
+ if (reg & PCI_COMMAND_MASTER)
+ return 0;
+
+ reg |= PCI_COMMAND_MASTER;
+
+ ret = pwrite(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
+ if (ret != sizeof(reg)) {
+ RTE_LOG(ERR, EAL,
+ "Cannot write command to PCI config space!\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num)
+{
+ FILE *f;
+ char filename[PATH_MAX];
+ int ret;
+ unsigned major, minor;
+ dev_t dev;
+
+ /* get the name of the sysfs file that contains the major and minor
+ * of the uio device and read its content */
+ snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path);
+
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n",
+ __func__);
+ return -1;
+ }
+
+ ret = fscanf(f, "%u:%u", &major, &minor);
+ if (ret != 2) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n",
+ __func__);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+
+ /* create the char device "mknod /dev/uioX c major minor" */
+ snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
+ dev = makedev(major, minor);
+ ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev);
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ return ret;
+}
+
+/*
+ * Return the uioX char device used for a pci device. On success, return
+ * the UIO number and fill dstbuf string with the path of the device in
+ * sysfs. On error, return a negative value. In this case dstbuf is
+ * invalid.
+ */
+static int
+pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf,
+ unsigned int buflen, int create)
+{
+ struct rte_pci_addr *loc = &dev->addr;
+ unsigned int uio_num;
+ struct dirent *e;
+ DIR *dir;
+ char dirname[PATH_MAX];
+
+ /* depending on kernel version, uio can be located in uio/uioX
+ * or uio:uioX */
+
+ snprintf(dirname, sizeof(dirname),
+ SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio",
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+ dir = opendir(dirname);
+ if (dir == NULL) {
+ /* retry with the parent directory */
+ snprintf(dirname, sizeof(dirname),
+ SYSFS_PCI_DEVICES "/" PCI_PRI_FMT,
+ loc->domain, loc->bus, loc->devid, loc->function);
+ dir = opendir(dirname);
+
+ if (dir == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
+ return -1;
+ }
+ }
+
+ /* take the first file starting with "uio" */
+ while ((e = readdir(dir)) != NULL) {
+ /* format could be uio%d ...*/
+ int shortprefix_len = sizeof("uio") - 1;
+ /* ... or uio:uio%d */
+ int longprefix_len = sizeof("uio:uio") - 1;
+ char *endptr;
+
+ if (strncmp(e->d_name, "uio", 3) != 0)
+ continue;
+
+ /* first try uio%d */
+ errno = 0;
+ uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+ if (errno == 0 && endptr != (e->d_name + shortprefix_len)) {
+ snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
+ break;
+ }
+
+ /* then try uio:uio%d */
+ errno = 0;
+ uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
+ if (errno == 0 && endptr != (e->d_name + longprefix_len)) {
+ snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num);
+ break;
+ }
+ }
+ closedir(dir);
+
+ /* No uio resource found */
+ if (e == NULL)
+ return -1;
+
+ /* create uio device if we've been asked to */
+ if (internal_config.create_uio_dev && create &&
+ pci_mknod_uio_dev(dstbuf, uio_num) < 0)
+ RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num);
+
+ return uio_num;
+}
+
+void
+pci_uio_free_resource(struct rte_pci_device *dev,
+ struct mapped_pci_resource *uio_res)
+{
+ rte_free(uio_res);
+
+ if (dev->intr_handle.uio_cfg_fd >= 0) {
+ close(dev->intr_handle.uio_cfg_fd);
+ dev->intr_handle.uio_cfg_fd = -1;
+ }
+ if (dev->intr_handle.fd) {
+ close(dev->intr_handle.fd);
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ }
+}
+
+int
+pci_uio_alloc_resource(struct rte_pci_device *dev,
+ struct mapped_pci_resource **uio_res)
+{
+ char dirname[PATH_MAX];
+ char cfgname[PATH_MAX];
+ char devname[PATH_MAX]; /* contains the /dev/uioX */
+ int uio_num;
+ struct rte_pci_addr *loc;
+
+ loc = &dev->addr;
+
+ /* find uio resource */
+ uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1);
+ if (uio_num < 0) {
+ RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, "
+ "skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
+ return 1;
+ }
+ snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+
+ /* save fd if in primary process */
+ dev->intr_handle.fd = open(devname, O_RDWR);
+ if (dev->intr_handle.fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ devname, strerror(errno));
+ goto error;
+ }
+
+ snprintf(cfgname, sizeof(cfgname),
+ "/sys/class/uio/uio%u/device/config", uio_num);
+ dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
+ if (dev->intr_handle.uio_cfg_fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ cfgname, strerror(errno));
+ goto error;
+ }
+
+ if (dev->kdrv == RTE_KDRV_IGB_UIO)
+ dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
+ else {
+ dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
+
+ /* set bus master that is not done by uio_pci_generic */
+ if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) {
+ RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
+ goto error;
+ }
+ }
+
+ /* allocate the mapping details for secondary processes*/
+ *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
+ if (*uio_res == NULL) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot store uio mmap details\n", __func__);
+ goto error;
+ }
+
+ snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname);
+ memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));
+
+ return 0;
+
+error:
+ pci_uio_free_resource(dev, *uio_res);
+ return -1;
+}
+
+int
+pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
+ struct mapped_pci_resource *uio_res, int map_idx)
+{
+ int fd;
+ char devname[PATH_MAX]; /* contains the /dev/uioX */
+ void *mapaddr;
+ struct rte_pci_addr *loc;
+ struct pci_map *maps;
+
+ loc = &dev->addr;
+ maps = uio_res->maps;
+
+ /* update devname for mmap */
+ snprintf(devname, sizeof(devname),
+ SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d",
+ loc->domain, loc->bus, loc->devid,
+ loc->function, res_idx);
+
+ /* allocate memory to keep path */
+ maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
+ if (maps[map_idx].path == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ /*
+ * open resource file, to mmap it
+ */
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ devname, strerror(errno));
+ goto error;
+ }
+
+ /* try mapping somewhere close to the end of hugepages */
+ if (pci_map_addr == NULL)
+ pci_map_addr = pci_find_max_end_va();
+
+ mapaddr = pci_map_resource(pci_map_addr, fd, 0,
+ (size_t)dev->mem_resource[res_idx].len, 0);
+ close(fd);
+ if (mapaddr == MAP_FAILED)
+ goto error;
+
+ pci_map_addr = RTE_PTR_ADD(mapaddr,
+ (size_t)dev->mem_resource[res_idx].len);
+
+ maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
+ maps[map_idx].size = dev->mem_resource[res_idx].len;
+ maps[map_idx].addr = mapaddr;
+ maps[map_idx].offset = 0;
+ strcpy(maps[map_idx].path, devname);
+ dev->mem_resource[res_idx].addr = mapaddr;
+
+ return 0;
+
+error:
+ rte_free(maps[map_idx].path);
+ return -1;
+}
+
+int
+pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
+ struct rte_pci_ioport *p)
+{
+#if defined(RTE_ARCH_X86)
+ char dirname[PATH_MAX];
+ char filename[PATH_MAX];
+ int uio_num;
+ unsigned long start;
+
+ uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
+ if (uio_num < 0)
+ return -1;
+
+ /* get portio start */
+ snprintf(filename, sizeof(filename),
+ "%s/portio/port%d/start", dirname, bar);
+ if (eal_parse_sysfs_value(filename, &start) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
+ __func__);
+ return -1;
+ }
+ /* ensure we don't get anything funny here, read/write will cast to
+ * uin16_t */
+ if (start > UINT16_MAX)
+ return -1;
+
+ /* FIXME only for primary process ? */
+ if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
+
+ snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
+ dev->intr_handle.fd = open(filename, O_RDWR);
+ if (dev->intr_handle.fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ filename, strerror(errno));
+ return -1;
+ }
+ dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
+ }
+
+ RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
+
+ p->base = start;
+ return 0;
+#else
+ RTE_SET_USED(dev);
+ RTE_SET_USED(bar);
+ RTE_SET_USED(p);
+ return -1;
+#endif
+}
+
+void
+pci_uio_ioport_read(struct rte_pci_ioport *p,
+ void *data, size_t len, off_t offset)
+{
+#if defined(RTE_ARCH_X86)
+ uint8_t *d;
+ int size;
+ unsigned short reg = p->base + offset;
+
+ for (d = data; len > 0; d += size, reg += size, len -= size) {
+ if (len >= 4) {
+ size = 4;
+ *(uint32_t *)d = inl(reg);
+ } else if (len >= 2) {
+ size = 2;
+ *(uint16_t *)d = inw(reg);
+ } else {
+ size = 1;
+ *d = inb(reg);
+ }
+ }
+#else
+ RTE_SET_USED(p);
+ RTE_SET_USED(data);
+ RTE_SET_USED(len);
+ RTE_SET_USED(offset);
+#endif
+}
+
+void
+pci_uio_ioport_write(struct rte_pci_ioport *p,
+ const void *data, size_t len, off_t offset)
+{
+#if defined(RTE_ARCH_X86)
+ const uint8_t *s;
+ int size;
+ unsigned short reg = p->base + offset;
+
+ for (s = data; len > 0; s += size, reg += size, len -= size) {
+ if (len >= 4) {
+ size = 4;
+ outl_p(*(const uint32_t *)s, reg);
+ } else if (len >= 2) {
+ size = 2;
+ outw_p(*(const uint16_t *)s, reg);
+ } else {
+ size = 1;
+ outb_p(*s, reg);
+ }
+ }
+#else
+ RTE_SET_USED(p);
+ RTE_SET_USED(data);
+ RTE_SET_USED(len);
+ RTE_SET_USED(offset);
+#endif
+}
+
+int
+pci_uio_ioport_unmap(struct rte_pci_ioport *p)
+{
+ RTE_SET_USED(p);
+#if defined(RTE_ARCH_X86)
+ /* FIXME close intr fd ? */
+ return 0;
+#else
+ return -1;
+#endif
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
new file mode 100644
index 00000000..10266f8f
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -0,0 +1,1097 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <fcntl.h>
+#include <linux/pci_regs.h>
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+#include <eal_private.h>
+
+#include "eal_filesystem.h"
+#include "eal_pci_init.h"
+#include "eal_vfio.h"
+
+/**
+ * @file
+ * PCI probing under linux (VFIO version)
+ *
+ * This code tries to determine if the PCI device is bound to VFIO driver,
+ * and initialize it (map BARs, set up interrupts) if that's the case.
+ *
+ * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y".
+ */
+
+#ifdef VFIO_PRESENT
+
+#define PAGE_SIZE (sysconf(_SC_PAGESIZE))
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+static struct rte_tailq_elem rte_vfio_tailq = {
+ .name = "VFIO_RESOURCE_LIST",
+};
+EAL_REGISTER_TAILQ(rte_vfio_tailq)
+
+#define VFIO_DIR "/dev/vfio"
+#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
+#define VFIO_GROUP_FMT "/dev/vfio/%u"
+#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
+#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
+#define VFIO_GET_REGION_IDX(x) (x >> 40)
+
+/* per-process VFIO config */
+static struct vfio_config vfio_cfg;
+
+/* DMA mapping function prototype.
+ * Takes VFIO container fd as a parameter.
+ * Returns 0 on success, -1 on error.
+ * */
+typedef int (*vfio_dma_func_t)(int);
+
+struct vfio_iommu_type {
+ int type_id;
+ const char *name;
+ vfio_dma_func_t dma_map_func;
+};
+
+static int vfio_type1_dma_map(int);
+static int vfio_noiommu_dma_map(int);
+
+/* IOMMU types we support */
+static const struct vfio_iommu_type iommu_types[] = {
+ /* x86 IOMMU, otherwise known as type 1 */
+ { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+ /* IOMMU-less mode */
+ { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
+};
+
+int
+vfio_type1_dma_map(int vfio_container_fd)
+{
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ int i, ret;
+
+ /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ struct vfio_iommu_type1_dma_map dma_map;
+
+ if (ms[i].addr == NULL)
+ break;
+
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = ms[i].addr_64;
+ dma_map.size = ms[i].len;
+ dma_map.iova = ms[i].phys_addr;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int
+vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
+{
+ /* No-IOMMU mode does not need DMA mapping */
+ return 0;
+}
+
+int
+pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+ void *buf, size_t len, off_t offs)
+{
+ return pread64(intr_handle->vfio_dev_fd, buf, len,
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+}
+
+int
+pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+ const void *buf, size_t len, off_t offs)
+{
+ return pwrite64(intr_handle->vfio_dev_fd, buf, len,
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+}
+
+/* get PCI BAR number where MSI-X interrupts are */
+static int
+pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
+ uint32_t *msix_table_size)
+{
+ int ret;
+ uint32_t reg;
+ uint16_t flags;
+ uint8_t cap_id, cap_offset;
+
+ /* read PCI capability pointer from config space */
+ ret = pread64(fd, &reg, sizeof(reg),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+ PCI_CAPABILITY_LIST);
+ if (ret != sizeof(reg)) {
+ RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
+ "config space!\n");
+ return -1;
+ }
+
+ /* we need first byte */
+ cap_offset = reg & 0xFF;
+
+ while (cap_offset) {
+
+ /* read PCI capability ID */
+ ret = pread64(fd, &reg, sizeof(reg),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+ cap_offset);
+ if (ret != sizeof(reg)) {
+ RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI "
+ "config space!\n");
+ return -1;
+ }
+
+ /* we need first byte */
+ cap_id = reg & 0xFF;
+
+ /* if we haven't reached MSI-X, check next capability */
+ if (cap_id != PCI_CAP_ID_MSIX) {
+ ret = pread64(fd, &reg, sizeof(reg),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+ cap_offset);
+ if (ret != sizeof(reg)) {
+ RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
+ "config space!\n");
+ return -1;
+ }
+
+ /* we need second byte */
+ cap_offset = (reg & 0xFF00) >> 8;
+
+ continue;
+ }
+ /* else, read table offset */
+ else {
+ /* table offset resides in the next 4 bytes */
+ ret = pread64(fd, &reg, sizeof(reg),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+ cap_offset + 4);
+ if (ret != sizeof(reg)) {
+ RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config "
+ "space!\n");
+ return -1;
+ }
+
+ ret = pread64(fd, &flags, sizeof(flags),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+ cap_offset + 2);
+ if (ret != sizeof(flags)) {
+ RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config "
+ "space!\n");
+ return -1;
+ }
+
+ *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
+ *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
+ *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
+
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/* set PCI bus mastering */
+static int
+pci_vfio_set_bus_master(int dev_fd)
+{
+ uint16_t reg;
+ int ret;
+
+ ret = pread64(dev_fd, &reg, sizeof(reg),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+ PCI_COMMAND);
+ if (ret != sizeof(reg)) {
+ RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n");
+ return -1;
+ }
+
+ /* set the master bit */
+ reg |= PCI_COMMAND_MASTER;
+
+ ret = pwrite64(dev_fd, &reg, sizeof(reg),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+ PCI_COMMAND);
+
+ if (ret != sizeof(reg)) {
+ RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
+static const struct vfio_iommu_type *
+pci_vfio_set_iommu_type(int vfio_container_fd) {
+ unsigned idx;
+ for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+ const struct vfio_iommu_type *t = &iommu_types[idx];
+
+ int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
+ t->type_id);
+ if (!ret) {
+ RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",
+ t->type_id, t->name);
+ return t;
+ }
+ /* not an error, there may be more supported IOMMU types */
+ RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, "
+ "error %i (%s)\n", t->type_id, t->name, errno,
+ strerror(errno));
+ }
+ /* if we didn't find a suitable IOMMU type, fail */
+ return NULL;
+}
+
+/* check if we have any supported extensions */
+static int
+pci_vfio_has_supported_extensions(int vfio_container_fd) {
+ int ret;
+ unsigned idx, n_extensions = 0;
+ for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+ const struct vfio_iommu_type *t = &iommu_types[idx];
+
+ ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
+ t->type_id);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, " could not get IOMMU type, "
+ "error %i (%s)\n", errno,
+ strerror(errno));
+ close(vfio_container_fd);
+ return -1;
+ } else if (ret == 1) {
+ /* we found a supported extension */
+ n_extensions++;
+ }
+ RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n",
+ t->type_id, t->name,
+ ret ? "supported" : "not supported");
+ }
+
+ /* if we didn't find any supported IOMMU types, fail */
+ if (!n_extensions) {
+ close(vfio_container_fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* set up interrupt support (but not enable interrupts) */
+static int
+pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
+{
+ int i, ret, intr_idx;
+
+ /* default to invalid index */
+ intr_idx = VFIO_PCI_NUM_IRQS;
+
+ /* get interrupt type from internal config (MSI-X by default, can be
+ * overriden from the command line
+ */
+ switch (internal_config.vfio_intr_mode) {
+ case RTE_INTR_MODE_MSIX:
+ intr_idx = VFIO_PCI_MSIX_IRQ_INDEX;
+ break;
+ case RTE_INTR_MODE_MSI:
+ intr_idx = VFIO_PCI_MSI_IRQ_INDEX;
+ break;
+ case RTE_INTR_MODE_LEGACY:
+ intr_idx = VFIO_PCI_INTX_IRQ_INDEX;
+ break;
+ /* don't do anything if we want to automatically determine interrupt type */
+ case RTE_INTR_MODE_NONE:
+ break;
+ default:
+ RTE_LOG(ERR, EAL, " unknown default interrupt type!\n");
+ return -1;
+ }
+
+ /* start from MSI-X interrupt type */
+ for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) {
+ struct vfio_irq_info irq = { .argsz = sizeof(irq) };
+ int fd = -1;
+
+ /* skip interrupt modes we don't want */
+ if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE &&
+ i != intr_idx)
+ continue;
+
+ irq.index = i;
+
+ ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, " cannot get IRQ info, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* if this vector cannot be used with eventfd, fail if we explicitly
+ * specified interrupt type, otherwise continue */
+ if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) {
+ if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) {
+ RTE_LOG(ERR, EAL,
+ " interrupt vector does not support eventfd!\n");
+ return -1;
+ } else
+ continue;
+ }
+
+ /* set up an eventfd for interrupts */
+ fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot set up eventfd, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ dev->intr_handle.fd = fd;
+ dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
+
+ switch (i) {
+ case VFIO_PCI_MSIX_IRQ_INDEX:
+ internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
+ dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
+ break;
+ case VFIO_PCI_MSI_IRQ_INDEX:
+ internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI;
+ dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI;
+ break;
+ case VFIO_PCI_INTX_IRQ_INDEX:
+ internal_config.vfio_intr_mode = RTE_INTR_MODE_LEGACY;
+ dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY;
+ break;
+ default:
+ RTE_LOG(ERR, EAL, " unknown interrupt type!\n");
+ return -1;
+ }
+
+ return 0;
+ }
+
+ /* if we're here, we haven't found a suitable interrupt vector */
+ return -1;
+}
+
+/* open container fd or get an existing one */
+int
+pci_vfio_get_container_fd(void)
+{
+ int ret, vfio_container_fd;
+
+ /* if we're in a primary process, try to open the container */
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR);
+ if (vfio_container_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot open VFIO container, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* check VFIO API version */
+ ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION);
+ if (ret != VFIO_API_VERSION) {
+ if (ret < 0)
+ RTE_LOG(ERR, EAL, " could not get VFIO API version, "
+ "error %i (%s)\n", errno, strerror(errno));
+ else
+ RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n");
+ close(vfio_container_fd);
+ return -1;
+ }
+
+ ret = pci_vfio_has_supported_extensions(vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " no supported IOMMU "
+ "extensions found!\n");
+ return -1;
+ }
+
+ return vfio_container_fd;
+ } else {
+ /*
+ * if we're in a secondary process, request container fd from the
+ * primary process via our socket
+ */
+ int socket_fd;
+
+ socket_fd = vfio_mp_sync_connect_to_primary();
+ if (socket_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
+ return -1;
+ }
+ if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) {
+ RTE_LOG(ERR, EAL, " cannot request container fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+ vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
+ if (vfio_container_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot get container fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+ close(socket_fd);
+ return vfio_container_fd;
+ }
+
+ return -1;
+}
+
+/* open group fd or get an existing one */
+int
+pci_vfio_get_group_fd(int iommu_group_no)
+{
+ int i;
+ int vfio_group_fd;
+ char filename[PATH_MAX];
+
+ /* check if we already have the group descriptor open */
+ for (i = 0; i < vfio_cfg.vfio_group_idx; i++)
+ if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)
+ return vfio_cfg.vfio_groups[i].fd;
+
+ /* if primary, try to open the group */
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ /* try regular group format */
+ snprintf(filename, sizeof(filename),
+ VFIO_GROUP_FMT, iommu_group_no);
+ vfio_group_fd = open(filename, O_RDWR);
+ if (vfio_group_fd < 0) {
+ /* if file not found, it's not an error */
+ if (errno != ENOENT) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
+ strerror(errno));
+ return -1;
+ }
+
+ /* special case: try no-IOMMU path as well */
+ snprintf(filename, sizeof(filename),
+ VFIO_NOIOMMU_GROUP_FMT, iommu_group_no);
+ vfio_group_fd = open(filename, O_RDWR);
+ if (vfio_group_fd < 0) {
+ if (errno != ENOENT) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+ }
+ /* noiommu group found */
+ }
+
+ /* if the fd is valid, create a new group for it */
+ if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) {
+ RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+ close(vfio_group_fd);
+ return -1;
+ }
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
+ return vfio_group_fd;
+ }
+ /* if we're in a secondary process, request group fd from the primary
+ * process via our socket
+ */
+ else {
+ int socket_fd, ret;
+
+ socket_fd = vfio_mp_sync_connect_to_primary();
+
+ if (socket_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
+ return -1;
+ }
+ if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {
+ RTE_LOG(ERR, EAL, " cannot request container fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+ if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) {
+ RTE_LOG(ERR, EAL, " cannot send group number!\n");
+ close(socket_fd);
+ return -1;
+ }
+ ret = vfio_mp_sync_receive_request(socket_fd);
+ switch (ret) {
+ case SOCKET_NO_FD:
+ close(socket_fd);
+ return 0;
+ case SOCKET_OK:
+ vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
+ /* if we got the fd, return it */
+ if (vfio_group_fd > 0) {
+ close(socket_fd);
+ return vfio_group_fd;
+ }
+ /* fall-through on error */
+ default:
+ RTE_LOG(ERR, EAL, " cannot get container fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+ }
+ return -1;
+}
+
+/* parse IOMMU group number for a PCI device
+ * returns 1 on success, -1 for errors, 0 for non-existent group
+ */
+static int
+pci_vfio_get_group_no(const char *pci_addr, int *iommu_group_no)
+{
+ char linkname[PATH_MAX];
+ char filename[PATH_MAX];
+ char *tok[16], *group_tok, *end;
+ int ret;
+
+ memset(linkname, 0, sizeof(linkname));
+ memset(filename, 0, sizeof(filename));
+
+ /* try to find out IOMMU group for this device */
+ snprintf(linkname, sizeof(linkname),
+ SYSFS_PCI_DEVICES "/%s/iommu_group", pci_addr);
+
+ ret = readlink(linkname, filename, sizeof(filename));
+
+ /* if the link doesn't exist, no VFIO for us */
+ if (ret < 0)
+ return 0;
+
+ ret = rte_strsplit(filename, sizeof(filename),
+ tok, RTE_DIM(tok), '/');
+
+ if (ret <= 0) {
+ RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", pci_addr);
+ return -1;
+ }
+
+ /* IOMMU group is always the last token */
+ errno = 0;
+ group_tok = tok[ret - 1];
+ end = group_tok;
+ *iommu_group_no = strtol(group_tok, &end, 10);
+ if ((end != group_tok && *end != '\0') || errno != 0) {
+ RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", pci_addr);
+ return -1;
+ }
+
+ return 1;
+}
+
+static void
+clear_current_group(void)
+{
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0;
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1;
+}
+
+
+/*
+ * map the PCI resources of a PCI device in virtual memory (VFIO version).
+ * primary and secondary processes follow almost exactly the same path
+ */
+int
+pci_vfio_map_resource(struct rte_pci_device *dev)
+{
+ struct vfio_group_status group_status = {
+ .argsz = sizeof(group_status)
+ };
+ struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+ int vfio_group_fd, vfio_dev_fd;
+ int iommu_group_no;
+ char pci_addr[PATH_MAX] = {0};
+ struct rte_pci_addr *loc = &dev->addr;
+ int i, ret, msix_bar;
+ struct mapped_pci_resource *vfio_res = NULL;
+ struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+
+ struct pci_map *maps;
+ uint32_t msix_table_offset = 0;
+ uint32_t msix_table_size = 0;
+ uint32_t ioport_bar;
+
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+ /* store PCI address string */
+ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+ /* get group number */
+ ret = pci_vfio_get_group_no(pci_addr, &iommu_group_no);
+ if (ret == 0) {
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ pci_addr);
+ return 1;
+ }
+
+ /* if negative, something failed */
+ if (ret < 0)
+ return -1;
+
+ /* get the actual group fd */
+ vfio_group_fd = pci_vfio_get_group_fd(iommu_group_no);
+ if (vfio_group_fd < 0)
+ return -1;
+
+ /* store group fd */
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
+
+ /* if group_fd == 0, that means the device isn't managed by VFIO */
+ if (vfio_group_fd == 0) {
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ pci_addr);
+ /* we store 0 as group fd to distinguish between existing but
+ * unbound VFIO groups, and groups that don't exist at all.
+ */
+ vfio_cfg.vfio_group_idx++;
+ return 1;
+ }
+
+ /*
+ * at this point, we know at least one port on this device is bound to VFIO,
+ * so we can proceed to try and set this particular port up
+ */
+
+ /* check if the group is viable */
+ ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s cannot get group status, "
+ "error %i (%s)\n", pci_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ clear_current_group();
+ return -1;
+ } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+ RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", pci_addr);
+ close(vfio_group_fd);
+ clear_current_group();
+ return -1;
+ }
+
+ /*
+ * at this point, we know that this group is viable (meaning, all devices
+ * are either bound to VFIO or not bound to anything)
+ */
+
+ /* check if group does not have a container yet */
+ if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
+
+ /* add group to a container */
+ ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
+ &vfio_cfg.vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, "
+ "error %i (%s)\n", pci_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ clear_current_group();
+ return -1;
+ }
+ /*
+ * at this point we know that this group has been successfully
+ * initialized, so we increment vfio_group_idx to indicate that we can
+ * add new groups.
+ */
+ vfio_cfg.vfio_group_idx++;
+ }
+
+ /*
+ * pick an IOMMU type and set up DMA mappings for container
+ *
+ * needs to be done only once, only when at least one group is assigned to
+ * a container and only in primary process
+ */
+ if (internal_config.process_type == RTE_PROC_PRIMARY &&
+ vfio_cfg.vfio_container_has_dma == 0) {
+ /* select an IOMMU type which we will be using */
+ const struct vfio_iommu_type *t =
+ pci_vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
+ if (!t) {
+ RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", pci_addr);
+ return -1;
+ }
+ ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s DMA remapping failed, "
+ "error %i (%s)\n", pci_addr, errno, strerror(errno));
+ return -1;
+ }
+ vfio_cfg.vfio_container_has_dma = 1;
+ }
+
+ /* get a file descriptor for the device */
+ vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, pci_addr);
+ if (vfio_dev_fd < 0) {
+ /* if we cannot get a device fd, this simply means that this
+ * particular port is not bound to VFIO
+ */
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ pci_addr);
+ return 1;
+ }
+
+ /* test and setup the device */
+ ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_INFO, &device_info);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s cannot get device info, "
+ "error %i (%s)\n", pci_addr, errno, strerror(errno));
+ close(vfio_dev_fd);
+ return -1;
+ }
+
+ /* get MSI-X BAR, if any (we have to know where it is because we can't
+ * easily mmap it when using VFIO) */
+ msix_bar = -1;
+ ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar,
+ &msix_table_offset, &msix_table_size);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr);
+ close(vfio_dev_fd);
+ return -1;
+ }
+
+ /* if we're in a primary process, allocate vfio_res and get region info */
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0);
+ if (vfio_res == NULL) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot store uio mmap details\n", __func__);
+ close(vfio_dev_fd);
+ return -1;
+ }
+ memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr));
+
+ /* get number of registers (up to BAR5) */
+ vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions,
+ VFIO_PCI_BAR5_REGION_INDEX + 1);
+ } else {
+ /* if we're in a secondary process, just find our tailq entry */
+ TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
+ if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
+ continue;
+ break;
+ }
+ /* if we haven't found our tailq entry, something's wrong */
+ if (vfio_res == NULL) {
+ RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
+ pci_addr);
+ close(vfio_dev_fd);
+ return -1;
+ }
+ }
+
+ /* map BARs */
+ maps = vfio_res->maps;
+
+ for (i = 0; i < (int) vfio_res->nb_maps; i++) {
+ struct vfio_region_info reg = { .argsz = sizeof(reg) };
+ void *bar_addr;
+ struct memreg {
+ unsigned long offset, size;
+ } memreg[2] = {};
+
+ reg.index = i;
+
+ ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s cannot get device region info "
+ "error %i (%s)\n", pci_addr, errno, strerror(errno));
+ close(vfio_dev_fd);
+ if (internal_config.process_type == RTE_PROC_PRIMARY)
+ rte_free(vfio_res);
+ return -1;
+ }
+
+ /* chk for io port region */
+ ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
+ + PCI_BASE_ADDRESS_0 + i*4);
+
+ if (ret != sizeof(ioport_bar)) {
+ RTE_LOG(ERR, EAL,
+ "Cannot read command (%x) from config space!\n",
+ PCI_BASE_ADDRESS_0 + i*4);
+ return -1;
+ }
+
+ if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) {
+ RTE_LOG(INFO, EAL,
+ "Ignore mapping IO port bar(%d) addr: %x\n",
+ i, ioport_bar);
+ continue;
+ }
+
+ /* skip non-mmapable BARs */
+ if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+ continue;
+
+ if (i == msix_bar) {
+ /*
+ * VFIO will not let us map the MSI-X table,
+ * but we can map around it.
+ */
+ uint32_t table_start = msix_table_offset;
+ uint32_t table_end = table_start + msix_table_size;
+ table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
+ table_start &= PAGE_MASK;
+
+ if (table_start == 0 && table_end >= reg.size) {
+ /* Cannot map this BAR */
+ RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i);
+ continue;
+ } else {
+ memreg[0].offset = reg.offset;
+ memreg[0].size = table_start;
+ memreg[1].offset = table_end;
+ memreg[1].size = reg.size - table_end;
+
+ RTE_LOG(DEBUG, EAL,
+ "Trying to map BAR %d that contains the MSI-X "
+ "table. Trying offsets: "
+ "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i,
+ memreg[0].offset, memreg[0].size,
+ memreg[1].offset, memreg[1].size);
+ }
+ } else {
+ memreg[0].offset = reg.offset;
+ memreg[0].size = reg.size;
+ }
+
+ /* try to figure out an address */
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ /* try mapping somewhere close to the end of hugepages */
+ if (pci_map_addr == NULL)
+ pci_map_addr = pci_find_max_end_va();
+
+ bar_addr = pci_map_addr;
+ pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+ } else {
+ bar_addr = maps[i].addr;
+ }
+
+ /* reserve the address using an inaccessible mapping */
+ bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE |
+ MAP_ANONYMOUS, -1, 0);
+ if (bar_addr != MAP_FAILED) {
+ void *map_addr = NULL;
+ if (memreg[0].size) {
+ /* actual map of first part */
+ map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
+ memreg[0].offset,
+ memreg[0].size,
+ MAP_FIXED);
+ }
+
+ /* if there's a second part, try to map it */
+ if (map_addr != MAP_FAILED
+ && memreg[1].offset && memreg[1].size) {
+ void *second_addr = RTE_PTR_ADD(bar_addr, memreg[1].offset);
+ map_addr = pci_map_resource(second_addr,
+ vfio_dev_fd, memreg[1].offset,
+ memreg[1].size,
+ MAP_FIXED);
+ }
+
+ if (map_addr == MAP_FAILED || !map_addr) {
+ munmap(bar_addr, reg.size);
+ bar_addr = MAP_FAILED;
+ }
+ }
+
+ if (bar_addr == MAP_FAILED ||
+ (internal_config.process_type == RTE_PROC_SECONDARY &&
+ bar_addr != maps[i].addr)) {
+ RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i,
+ strerror(errno));
+ close(vfio_dev_fd);
+ if (internal_config.process_type == RTE_PROC_PRIMARY)
+ rte_free(vfio_res);
+ return -1;
+ }
+
+ maps[i].addr = bar_addr;
+ maps[i].offset = reg.offset;
+ maps[i].size = reg.size;
+ maps[i].path = NULL; /* vfio doesn't have per-resource paths */
+ dev->mem_resource[i].addr = bar_addr;
+ }
+
+ /* if secondary process, do not set up interrupts */
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
+ RTE_LOG(ERR, EAL, " %s error setting up interrupts!\n", pci_addr);
+ close(vfio_dev_fd);
+ rte_free(vfio_res);
+ return -1;
+ }
+
+ /* set bus mastering for the device */
+ if (pci_vfio_set_bus_master(vfio_dev_fd)) {
+ RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr);
+ close(vfio_dev_fd);
+ rte_free(vfio_res);
+ return -1;
+ }
+
+ /* Reset the device */
+ ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
+ }
+
+ if (internal_config.process_type == RTE_PROC_PRIMARY)
+ TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
+
+ return 0;
+}
+
+int
+pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
+ struct rte_pci_ioport *p)
+{
+ if (bar < VFIO_PCI_BAR0_REGION_INDEX ||
+ bar > VFIO_PCI_BAR5_REGION_INDEX) {
+ RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar);
+ return -1;
+ }
+
+ p->dev = dev;
+ p->base = VFIO_GET_REGION_ADDR(bar);
+ return 0;
+}
+
+void
+pci_vfio_ioport_read(struct rte_pci_ioport *p,
+ void *data, size_t len, off_t offset)
+{
+ const struct rte_intr_handle *intr_handle = &p->dev->intr_handle;
+
+ if (pread64(intr_handle->vfio_dev_fd, data,
+ len, p->base + offset) <= 0)
+ RTE_LOG(ERR, EAL,
+ "Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n",
+ VFIO_GET_REGION_IDX(p->base), (int)offset);
+}
+
+void
+pci_vfio_ioport_write(struct rte_pci_ioport *p,
+ const void *data, size_t len, off_t offset)
+{
+ const struct rte_intr_handle *intr_handle = &p->dev->intr_handle;
+
+ if (pwrite64(intr_handle->vfio_dev_fd, data,
+ len, p->base + offset) <= 0)
+ RTE_LOG(ERR, EAL,
+ "Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n",
+ VFIO_GET_REGION_IDX(p->base), (int)offset);
+}
+
+int
+pci_vfio_ioport_unmap(struct rte_pci_ioport *p)
+{
+ RTE_SET_USED(p);
+ return -1;
+}
+
+int
+pci_vfio_enable(void)
+{
+ /* initialize group list */
+ int i;
+ int vfio_available;
+
+ for (i = 0; i < VFIO_MAX_GROUPS; i++) {
+ vfio_cfg.vfio_groups[i].fd = -1;
+ vfio_cfg.vfio_groups[i].group_no = -1;
+ }
+
+ /* inform the user that we are probing for VFIO */
+ RTE_LOG(INFO, EAL, "Probing VFIO support...\n");
+
+ /* check if vfio-pci module is loaded */
+ vfio_available = rte_eal_check_module("vfio_pci");
+
+ /* return error directly */
+ if (vfio_available == -1) {
+ RTE_LOG(INFO, EAL, "Could not get loaded module details!\n");
+ return -1;
+ }
+
+ /* return 0 if VFIO modules not loaded */
+ if (vfio_available == 0) {
+ RTE_LOG(DEBUG, EAL, "VFIO modules not loaded, "
+ "skipping VFIO support...\n");
+ return 0;
+ }
+
+ vfio_cfg.vfio_container_fd = pci_vfio_get_container_fd();
+
+ /* check if we have VFIO driver enabled */
+ if (vfio_cfg.vfio_container_fd != -1) {
+ RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
+ vfio_cfg.vfio_enabled = 1;
+ } else {
+ RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
+ }
+
+ return 0;
+}
+
+int
+pci_vfio_is_enabled(void)
+{
+ return vfio_cfg.vfio_enabled;
+}
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
new file mode 100644
index 00000000..d9188fde
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
@@ -0,0 +1,405 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <pthread.h>
+
+/* sys/un.h with __USE_MISC uses strlen, which is unsafe */
+#ifdef __USE_MISC
+#define REMOVED_USE_MISC
+#undef __USE_MISC
+#endif
+#include <sys/un.h>
+/* make sure we redefine __USE_MISC only if it was previously undefined */
+#ifdef REMOVED_USE_MISC
+#define __USE_MISC
+#undef REMOVED_USE_MISC
+#endif
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+
+#include "eal_filesystem.h"
+#include "eal_pci_init.h"
+#include "eal_thread.h"
+
+/**
+ * @file
+ * VFIO socket for communication between primary and secondary processes.
+ *
+ * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y".
+ */
+
+#ifdef VFIO_PRESENT
+
+#define SOCKET_PATH_FMT "%s/.%s_mp_socket"
+#define CMSGLEN (CMSG_LEN(sizeof(int)))
+#define FD_TO_CMSGHDR(fd, chdr) \
+ do {\
+ (chdr).cmsg_len = CMSGLEN;\
+ (chdr).cmsg_level = SOL_SOCKET;\
+ (chdr).cmsg_type = SCM_RIGHTS;\
+ memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\
+ } while (0)
+#define CMSGHDR_TO_FD(chdr, fd) \
+ memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd))
+
+static pthread_t socket_thread;
+static int mp_socket_fd;
+
+
+/* get socket path (/var/run if root, $HOME otherwise) */
+static void
+get_socket_path(char *buffer, int bufsz)
+{
+ const char *dir = "/var/run";
+ const char *home_dir = getenv("HOME");
+
+ if (getuid() != 0 && home_dir != NULL)
+ dir = home_dir;
+
+ /* use current prefix as file path */
+ snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir,
+ internal_config.hugefile_prefix);
+}
+
+
+
+/*
+ * data flow for socket comm protocol:
+ * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP
+ * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number
+ * 2. server receives message
+ * 2a. in case of invalid group, SOCKET_ERR is sent back to client
+ * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client
+ * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd
+ *
+ * in case of any error, socket is closed.
+ */
+
+/* send a request, return -1 on error */
+int
+vfio_mp_sync_send_request(int socket, int req)
+{
+ struct msghdr hdr;
+ struct iovec iov;
+ int buf;
+ int ret;
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ buf = req;
+
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+ iov.iov_base = (char *) &buf;
+ iov.iov_len = sizeof(buf);
+
+ ret = sendmsg(socket, &hdr, 0);
+ if (ret < 0)
+ return -1;
+ return 0;
+}
+
+/* receive a request and return it */
+int
+vfio_mp_sync_receive_request(int socket)
+{
+ int buf;
+ struct msghdr hdr;
+ struct iovec iov;
+ int ret, req;
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ buf = SOCKET_ERR;
+
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+ iov.iov_base = (char *) &buf;
+ iov.iov_len = sizeof(buf);
+
+ ret = recvmsg(socket, &hdr, 0);
+ if (ret < 0)
+ return -1;
+
+ req = buf;
+
+ return req;
+}
+
+/* send OK in message, fd in control message */
+int
+vfio_mp_sync_send_fd(int socket, int fd)
+{
+ int buf;
+ struct msghdr hdr;
+ struct cmsghdr *chdr;
+ char chdr_buf[CMSGLEN];
+ struct iovec iov;
+ int ret;
+
+ chdr = (struct cmsghdr *) chdr_buf;
+ memset(chdr, 0, sizeof(chdr_buf));
+ memset(&hdr, 0, sizeof(hdr));
+
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+ iov.iov_base = (char *) &buf;
+ iov.iov_len = sizeof(buf);
+ hdr.msg_control = chdr;
+ hdr.msg_controllen = CMSGLEN;
+
+ buf = SOCKET_OK;
+ FD_TO_CMSGHDR(fd, *chdr);
+
+ ret = sendmsg(socket, &hdr, 0);
+ if (ret < 0)
+ return -1;
+ return 0;
+}
+
+/* receive OK in message, fd in control message */
+int
+vfio_mp_sync_receive_fd(int socket)
+{
+ int buf;
+ struct msghdr hdr;
+ struct cmsghdr *chdr;
+ char chdr_buf[CMSGLEN];
+ struct iovec iov;
+ int ret, req, fd;
+
+ buf = SOCKET_ERR;
+
+ chdr = (struct cmsghdr *) chdr_buf;
+ memset(chdr, 0, sizeof(chdr_buf));
+ memset(&hdr, 0, sizeof(hdr));
+
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+ iov.iov_base = (char *) &buf;
+ iov.iov_len = sizeof(buf);
+ hdr.msg_control = chdr;
+ hdr.msg_controllen = CMSGLEN;
+
+ ret = recvmsg(socket, &hdr, 0);
+ if (ret < 0)
+ return -1;
+
+ req = buf;
+
+ if (req != SOCKET_OK)
+ return -1;
+
+ CMSGHDR_TO_FD(*chdr, fd);
+
+ return fd;
+}
+
+/* connect socket_fd in secondary process to the primary process's socket */
+int
+vfio_mp_sync_connect_to_primary(void)
+{
+ struct sockaddr_un addr;
+ socklen_t sockaddr_len;
+ int socket_fd;
+
+ /* set up a socket */
+ socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+ if (socket_fd < 0) {
+ RTE_LOG(ERR, EAL, "Failed to create socket!\n");
+ return -1;
+ }
+
+ get_socket_path(addr.sun_path, sizeof(addr.sun_path));
+ addr.sun_family = AF_UNIX;
+
+ sockaddr_len = sizeof(struct sockaddr_un);
+
+ if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0)
+ return socket_fd;
+
+ /* if connect failed */
+ close(socket_fd);
+ return -1;
+}
+
+
+
+/*
+ * socket listening thread for primary process
+ */
+static __attribute__((noreturn)) void *
+pci_vfio_mp_sync_thread(void __rte_unused * arg)
+{
+ int ret, fd, vfio_group_no;
+
+ /* wait for requests on the socket */
+ for (;;) {
+ int conn_sock;
+ struct sockaddr_un addr;
+ socklen_t sockaddr_len = sizeof(addr);
+
+ /* this is a blocking call */
+ conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr,
+ &sockaddr_len);
+
+ /* just restart on error */
+ if (conn_sock == -1)
+ continue;
+
+ /* set socket to linger after close */
+ struct linger l;
+ l.l_onoff = 1;
+ l.l_linger = 60;
+ setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l));
+
+ ret = vfio_mp_sync_receive_request(conn_sock);
+
+ switch (ret) {
+ case SOCKET_REQ_CONTAINER:
+ fd = pci_vfio_get_container_fd();
+ if (fd < 0)
+ vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
+ else
+ vfio_mp_sync_send_fd(conn_sock, fd);
+ break;
+ case SOCKET_REQ_GROUP:
+ /* wait for group number */
+ vfio_group_no = vfio_mp_sync_receive_request(conn_sock);
+ if (vfio_group_no < 0) {
+ close(conn_sock);
+ continue;
+ }
+
+ fd = pci_vfio_get_group_fd(vfio_group_no);
+
+ if (fd < 0)
+ vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
+ /* if VFIO group exists but isn't bound to VFIO driver */
+ else if (fd == 0)
+ vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
+ /* if group exists and is bound to VFIO driver */
+ else {
+ vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
+ vfio_mp_sync_send_fd(conn_sock, fd);
+ }
+ break;
+ default:
+ vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
+ break;
+ }
+ close(conn_sock);
+ }
+}
+
+static int
+vfio_mp_sync_socket_setup(void)
+{
+ int ret, socket_fd;
+ struct sockaddr_un addr;
+ socklen_t sockaddr_len;
+
+ /* set up a socket */
+ socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+ if (socket_fd < 0) {
+ RTE_LOG(ERR, EAL, "Failed to create socket!\n");
+ return -1;
+ }
+
+ get_socket_path(addr.sun_path, sizeof(addr.sun_path));
+ addr.sun_family = AF_UNIX;
+
+ sockaddr_len = sizeof(struct sockaddr_un);
+
+ unlink(addr.sun_path);
+
+ ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno));
+ close(socket_fd);
+ return -1;
+ }
+
+ ret = listen(socket_fd, 50);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno));
+ close(socket_fd);
+ return -1;
+ }
+
+ /* save the socket in local configuration */
+ mp_socket_fd = socket_fd;
+
+ return 0;
+}
+
+/*
+ * set up a local socket and tell it to listen for incoming connections
+ */
+int
+pci_vfio_mp_sync_setup(void)
+{
+ int ret;
+ char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+ if (vfio_mp_sync_socket_setup() < 0) {
+ RTE_LOG(ERR, EAL, "Failed to set up local socket!\n");
+ return -1;
+ }
+
+ ret = pthread_create(&socket_thread, NULL,
+ pci_vfio_mp_sync_thread, NULL);
+ if (ret) {
+ RTE_LOG(ERR, EAL,
+ "Failed to create thread for communication with secondary processes!\n");
+ close(mp_socket_fd);
+ return -1;
+ }
+
+ /* Set thread_name for aid in debugging. */
+ snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pci-vfio-sync");
+ ret = rte_thread_setname(socket_thread, thread_name);
+ if (ret)
+ RTE_LOG(ERR, EAL,
+ "Failed to set thread name for secondary processes!\n");
+
+ return 0;
+}
+
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
new file mode 100644
index 00000000..18bd8e04
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -0,0 +1,199 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/queue.h>
+#include <sys/syscall.h>
+
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
+RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
+
+/*
+ * Send a message to a slave lcore identified by slave_id to call a
+ * function f with argument arg. Once the execution is done, the
+ * remote lcore switch in FINISHED state.
+ */
+int
+rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
+{
+ int n;
+ char c = 0;
+ int m2s = lcore_config[slave_id].pipe_master2slave[1];
+ int s2m = lcore_config[slave_id].pipe_slave2master[0];
+
+ if (lcore_config[slave_id].state != WAIT)
+ return -EBUSY;
+
+ lcore_config[slave_id].f = f;
+ lcore_config[slave_id].arg = arg;
+
+ /* send message */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = write(m2s, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ /* wait ack */
+ do {
+ n = read(s2m, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ return 0;
+}
+
+/* set affinity for current EAL thread */
+static int
+eal_thread_set_affinity(void)
+{
+ unsigned lcore_id = rte_lcore_id();
+
+ /* acquire system unique id */
+ rte_gettid();
+
+ /* update EAL thread core affinity */
+ return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
+}
+
+void eal_thread_init_master(unsigned lcore_id)
+{
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+ /* set CPU affinity */
+ if (eal_thread_set_affinity() < 0)
+ rte_panic("cannot set affinity\n");
+}
+
+/* main loop of threads */
+__attribute__((noreturn)) void *
+eal_thread_loop(__attribute__((unused)) void *arg)
+{
+ char c;
+ int n, ret;
+ unsigned lcore_id;
+ pthread_t thread_id;
+ int m2s, s2m;
+ char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+
+ thread_id = pthread_self();
+
+ /* retrieve our lcore_id from the configuration structure */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (thread_id == lcore_config[lcore_id].thread_id)
+ break;
+ }
+ if (lcore_id == RTE_MAX_LCORE)
+ rte_panic("cannot retrieve lcore id\n");
+
+ m2s = lcore_config[lcore_id].pipe_master2slave[0];
+ s2m = lcore_config[lcore_id].pipe_slave2master[1];
+
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+ /* set CPU affinity */
+ if (eal_thread_set_affinity() < 0)
+ rte_panic("cannot set affinity\n");
+
+ ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+
+ RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
+ lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "...");
+
+ /* read on our pipe to get commands */
+ while (1) {
+ void *fct_arg;
+
+ /* wait command */
+ do {
+ n = read(m2s, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ lcore_config[lcore_id].state = RUNNING;
+
+ /* send ack */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = write(s2m, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ if (lcore_config[lcore_id].f == NULL)
+ rte_panic("NULL function pointer\n");
+
+ /* call the function and store the return value */
+ fct_arg = lcore_config[lcore_id].arg;
+ ret = lcore_config[lcore_id].f(fct_arg);
+ lcore_config[lcore_id].ret = ret;
+ rte_wmb();
+ lcore_config[lcore_id].state = FINISHED;
+ }
+
+ /* never reached */
+ /* pthread_exit(NULL); */
+ /* return NULL; */
+}
+
+/* require calling thread tid by gettid() */
+int rte_sys_gettid(void)
+{
+ return (int)syscall(SYS_gettid);
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c
new file mode 100644
index 00000000..f2abb7b6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_timer.c
@@ -0,0 +1,305 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012-2013 6WIND S.A.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+
+enum timer_source eal_timer_source = EAL_TIMER_HPET;
+
+#ifdef RTE_LIBEAL_USE_HPET
+
+#define DEV_HPET "/dev/hpet"
+
+/* Maximum number of counters. */
+#define HPET_TIMER_NUM 3
+
+/* General capabilities register */
+#define CLK_PERIOD_SHIFT 32 /* Clock period shift. */
+#define CLK_PERIOD_MASK 0xffffffff00000000ULL /* Clock period mask. */
+
+/**
+ * HPET timer registers. From the Intel IA-PC HPET (High Precision Event
+ * Timers) Specification.
+ */
+struct eal_hpet_regs {
+ /* Memory-mapped, software visible registers */
+ uint64_t capabilities; /**< RO General Capabilities Register. */
+ uint64_t reserved0; /**< Reserved for future use. */
+ uint64_t config; /**< RW General Configuration Register. */
+ uint64_t reserved1; /**< Reserved for future use. */
+ uint64_t isr; /**< RW Clear General Interrupt Status. */
+ uint64_t reserved2[25]; /**< Reserved for future use. */
+ union {
+ uint64_t counter; /**< RW Main Counter Value Register. */
+ struct {
+ uint32_t counter_l; /**< RW Main Counter Low. */
+ uint32_t counter_h; /**< RW Main Counter High. */
+ };
+ };
+ uint64_t reserved3; /**< Reserved for future use. */
+ struct {
+ uint64_t config; /**< RW Timer Config and Capability Reg. */
+ uint64_t comp; /**< RW Timer Comparator Value Register. */
+ uint64_t fsb; /**< RW FSB Interrupt Route Register. */
+ uint64_t reserved4; /**< Reserved for future use. */
+ } timers[HPET_TIMER_NUM]; /**< Set of HPET timers. */
+};
+
+/* Mmap'd hpet registers */
+static volatile struct eal_hpet_regs *eal_hpet = NULL;
+
+/* Period at which the HPET counter increments in
+ * femtoseconds (10^-15 seconds). */
+static uint32_t eal_hpet_resolution_fs = 0;
+
+/* Frequency of the HPET counter in Hz */
+static uint64_t eal_hpet_resolution_hz = 0;
+
+/* Incremented 4 times during one 32bits hpet full count */
+static uint32_t eal_hpet_msb;
+
+static pthread_t msb_inc_thread_id;
+
+/*
+ * This function runs on a specific thread to update a global variable
+ * containing used to process MSB of the HPET (unfortunatelly, we need
+ * this because hpet is 32 bits by default under linux).
+ */
+static void
+hpet_msb_inc(__attribute__((unused)) void *arg)
+{
+ uint32_t t;
+
+ while (1) {
+ t = (eal_hpet->counter_l >> 30);
+ if (t != (eal_hpet_msb & 3))
+ eal_hpet_msb ++;
+ sleep(10);
+ }
+}
+
+uint64_t
+rte_get_hpet_hz(void)
+{
+ if(internal_config.no_hpet)
+ rte_panic("Error, HPET called, but no HPET present\n");
+
+ return eal_hpet_resolution_hz;
+}
+
+uint64_t
+rte_get_hpet_cycles(void)
+{
+ uint32_t t, msb;
+ uint64_t ret;
+
+ if(internal_config.no_hpet)
+ rte_panic("Error, HPET called, but no HPET present\n");
+
+ t = eal_hpet->counter_l;
+ msb = eal_hpet_msb;
+ ret = (msb + 2 - (t >> 30)) / 4;
+ ret <<= 32;
+ ret += t;
+ return ret;
+}
+
+#endif
+
+#ifdef RTE_LIBEAL_USE_HPET
+/*
+ * Open and mmap /dev/hpet (high precision event timer) that will
+ * provide our time reference.
+ */
+int
+rte_eal_hpet_init(int make_default)
+{
+ int fd, ret;
+ char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+ if (internal_config.no_hpet) {
+ RTE_LOG(NOTICE, EAL, "HPET is disabled\n");
+ return -1;
+ }
+
+ fd = open(DEV_HPET, O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "ERROR: Cannot open "DEV_HPET": %s!\n",
+ strerror(errno));
+ internal_config.no_hpet = 1;
+ return -1;
+ }
+ eal_hpet = mmap(NULL, 1024, PROT_READ, MAP_SHARED, fd, 0);
+ if (eal_hpet == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "ERROR: Cannot mmap "DEV_HPET"!\n"
+ "Please enable CONFIG_HPET_MMAP in your kernel configuration "
+ "to allow HPET support.\n"
+ "To run without using HPET, set CONFIG_RTE_LIBEAL_USE_HPET=n "
+ "in your build configuration or use '--no-hpet' EAL flag.\n");
+ close(fd);
+ internal_config.no_hpet = 1;
+ return -1;
+ }
+ close(fd);
+
+ eal_hpet_resolution_fs = (uint32_t)((eal_hpet->capabilities &
+ CLK_PERIOD_MASK) >>
+ CLK_PERIOD_SHIFT);
+
+ eal_hpet_resolution_hz = (1000ULL*1000ULL*1000ULL*1000ULL*1000ULL) /
+ (uint64_t)eal_hpet_resolution_fs;
+
+ RTE_LOG(INFO, EAL, "HPET frequency is ~%"PRIu64" kHz\n",
+ eal_hpet_resolution_hz/1000);
+
+ eal_hpet_msb = (eal_hpet->counter_l >> 30);
+
+ /* create a thread that will increment a global variable for
+ * msb (hpet is 32 bits by default under linux) */
+ ret = pthread_create(&msb_inc_thread_id, NULL,
+ (void *(*)(void *))hpet_msb_inc, NULL);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "ERROR: Cannot create HPET timer thread!\n");
+ internal_config.no_hpet = 1;
+ return -1;
+ }
+
+ /*
+ * Set thread_name for aid in debugging.
+ */
+ snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "hpet-msb-inc");
+ ret = rte_thread_setname(msb_inc_thread_id, thread_name);
+ if (ret != 0)
+ RTE_LOG(ERR, EAL,
+ "ERROR: Cannot set HPET timer thread name!\n");
+
+ if (make_default)
+ eal_timer_source = EAL_TIMER_HPET;
+ return 0;
+}
+#endif
+
+static void
+check_tsc_flags(void)
+{
+ char line[512];
+ FILE *stream;
+
+ stream = fopen("/proc/cpuinfo", "r");
+ if (!stream) {
+ RTE_LOG(WARNING, EAL, "WARNING: Unable to open /proc/cpuinfo\n");
+ return;
+ }
+
+ while (fgets(line, sizeof line, stream)) {
+ char *constant_tsc;
+ char *nonstop_tsc;
+
+ if (strncmp(line, "flags", 5) != 0)
+ continue;
+
+ constant_tsc = strstr(line, "constant_tsc");
+ nonstop_tsc = strstr(line, "nonstop_tsc");
+ if (!constant_tsc || !nonstop_tsc)
+ RTE_LOG(WARNING, EAL,
+ "WARNING: cpu flags "
+ "constant_tsc=%s "
+ "nonstop_tsc=%s "
+ "-> using unreliable clock cycles !\n",
+ constant_tsc ? "yes":"no",
+ nonstop_tsc ? "yes":"no");
+ break;
+ }
+
+ fclose(stream);
+}
+
+uint64_t
+get_tsc_freq(void)
+{
+#ifdef CLOCK_MONOTONIC_RAW
+#define NS_PER_SEC 1E9
+
+ struct timespec sleeptime = {.tv_nsec = NS_PER_SEC / 10 }; /* 1/10 second */
+
+ struct timespec t_start, t_end;
+ uint64_t tsc_hz;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) {
+ uint64_t ns, end, start = rte_rdtsc();
+ nanosleep(&sleeptime,NULL);
+ clock_gettime(CLOCK_MONOTONIC_RAW, &t_end);
+ end = rte_rdtsc();
+ ns = ((t_end.tv_sec - t_start.tv_sec) * NS_PER_SEC);
+ ns += (t_end.tv_nsec - t_start.tv_nsec);
+
+ double secs = (double)ns/NS_PER_SEC;
+ tsc_hz = (uint64_t)((end - start)/secs);
+ return tsc_hz;
+ }
+#endif
+ return 0;
+}
+
+int
+rte_eal_timer_init(void)
+{
+
+ eal_timer_source = EAL_TIMER_TSC;
+
+ set_tsc_freq();
+ check_tsc_flags();
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
new file mode 100644
index 00000000..f483bf40
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -0,0 +1,67 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_VFIO_H_
+#define EAL_VFIO_H_
+
+/*
+ * determine if VFIO is present on the system
+ */
+#ifdef RTE_EAL_VFIO
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
+#include <linux/vfio.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+#define RTE_PCI_MSIX_TABLE_BIR 0x7
+#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8
+#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff
+#else
+#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR
+#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET
+#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE
+#endif
+
+#define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)
+#define RTE_VFIO_NOIOMMU 8
+#else
+#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU
+#endif
+
+#define VFIO_PRESENT
+#endif /* kernel version */
+#endif /* RTE_EAL_VFIO */
+
+#endif /* EAL_VFIO_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
new file mode 100644
index 00000000..495eef9e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
@@ -0,0 +1,369 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <sys/file.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include <exec-env/rte_dom0_common.h>
+
+#define PAGE_SIZE RTE_PGSIZE_4K
+#define DEFAUL_DOM0_NAME "dom0-mem"
+
+static int xen_fd = -1;
+static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB";
+
+/*
+ * Try to mmap *size bytes in /dev/zero. If it is successful, return the
+ * pointer to the mmap'd area and keep *size unmodified. Else, retry
+ * with a smaller zone: decrease *size by mem_size until it reaches
+ * 0. In this case, return NULL. Note: this function returns an address
+ * which is a multiple of mem_size size.
+ */
+static void *
+xen_get_virtual_area(size_t *size, size_t mem_size)
+{
+ void *addr;
+ int fd;
+ long aligned_addr;
+
+ RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zu bytes\n", *size);
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd < 0){
+ RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
+ return NULL;
+ }
+ do {
+ addr = mmap(NULL, (*size) + mem_size, PROT_READ,
+ MAP_PRIVATE, fd, 0);
+ if (addr == MAP_FAILED)
+ *size -= mem_size;
+ } while (addr == MAP_FAILED && *size > 0);
+
+ if (addr == MAP_FAILED) {
+ close(fd);
+ RTE_LOG(ERR, EAL, "Cannot get a virtual area\n");
+ return NULL;
+ }
+
+ munmap(addr, (*size) + mem_size);
+ close(fd);
+
+ /* align addr to a mem_size boundary */
+ aligned_addr = (uintptr_t)addr;
+ aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size);
+ addr = (void *)(aligned_addr);
+
+ RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
+ addr, *size);
+
+ return addr;
+}
+
+/**
+ * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm
+ * /memsize-mB/memsize file, and the size unit is mB.
+ */
+static int
+get_xen_memory_size(void)
+{
+ char path[PATH_MAX];
+ unsigned long mem_size = 0;
+ static const char *file_name;
+
+ file_name = "memsize";
+ snprintf(path, sizeof(path), "%s/%s",
+ sys_dir_path, file_name);
+
+ if (eal_parse_sysfs_value(path, &mem_size) < 0)
+ return -1;
+
+ if (mem_size == 0)
+ rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not"
+ " configured.\n",sys_dir_path, file_name);
+ if (mem_size % 2)
+ rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be"
+ " even number.\n",sys_dir_path, file_name);
+
+ if (mem_size > DOM0_CONFIG_MEMSIZE)
+ rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger"
+ " than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE);
+
+ return mem_size;
+}
+
+/**
+ * Based on physical address to caculate MFN in Xen Dom0.
+ */
+phys_addr_t
+rte_xen_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr)
+{
+ int mfn_id;
+ uint64_t mfn, mfn_offset;
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg *memseg = mcfg->memseg;
+
+ mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M;
+
+ /*the MFN is contiguous in 2M */
+ mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) %
+ RTE_PGSIZE_2M / PAGE_SIZE;
+ mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id];
+
+ /** return mechine address */
+ return mfn * PAGE_SIZE + phy_addr % PAGE_SIZE;
+}
+
+int
+rte_xen_dom0_memory_init(void)
+{
+ void *vir_addr, *vma_addr = NULL;
+ int err, ret = 0;
+ uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0;
+ size_t vma_len = 0;
+ struct memory_info meminfo;
+ struct memseg_info seginfo[RTE_MAX_MEMSEG];
+ int flags, page_size = getpagesize();
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg *memseg = mcfg->memseg;
+ uint64_t total_mem = internal_config.memory;
+
+ memset(seginfo, 0, sizeof(seginfo));
+ memset(&meminfo, 0, sizeof(struct memory_info));
+
+ mem_size = get_xen_memory_size();
+ requested = (unsigned) (total_mem / 0x100000);
+ if (requested > mem_size)
+ /* if we didn't satisfy total memory requirements */
+ rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB,"
+ " available: %uMB\n", requested, mem_size);
+ else if (total_mem != 0)
+ mem_size = requested;
+
+ /* Check FD and open once */
+ if (xen_fd < 0) {
+ xen_fd = open(DOM0_MM_DEV, O_RDWR);
+ if (xen_fd < 0) {
+ RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
+ return -1;
+ }
+ }
+
+ meminfo.size = mem_size;
+
+ /* construct memory mangement name for Dom0 */
+ snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s",
+ internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
+
+ /* Notify kernel driver to allocate memory */
+ ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n");
+ err = -EIO;
+ goto fail;
+ }
+
+ /* Get number of memory segment from driver */
+ ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n");
+ err = -EIO;
+ goto fail;
+ }
+
+ if(num_memseg > RTE_MAX_MEMSEG){
+ RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater"
+ " than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG);
+ err = -EIO;
+ goto fail;
+ }
+
+ /* get all memory segements information */
+ ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n");
+ err = -EIO;
+ goto fail;
+ }
+
+ /* map all memory segments to contiguous user space */
+ for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++)
+ {
+ vma_len = seginfo[memseg_idx].size;
+
+ /**
+ * get the biggest virtual memory area up to vma_len. If it fails,
+ * vma_addr is NULL, so let the kernel provide the address.
+ */
+ vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M);
+ if (vma_addr == NULL) {
+ flags = MAP_SHARED;
+ vma_len = RTE_PGSIZE_2M;
+ } else
+ flags = MAP_SHARED | MAP_FIXED;
+
+ seginfo[memseg_idx].size = vma_len;
+ vir_addr = mmap(vma_addr, seginfo[memseg_idx].size,
+ PROT_READ|PROT_WRITE, flags, xen_fd,
+ memseg_idx * page_size);
+ if (vir_addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n",
+ DOM0_MM_DEV);
+ err = -EIO;
+ goto fail;
+ }
+
+ memseg[memseg_idx].addr = vir_addr;
+ memseg[memseg_idx].phys_addr = page_size *
+ seginfo[memseg_idx].pfn ;
+ memseg[memseg_idx].len = seginfo[memseg_idx].size;
+ for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++)
+ memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i];
+
+ /* MFNs are continuous in 2M, so assume that page size is 2M */
+ memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M;
+
+ memseg[memseg_idx].nchannel = mcfg->nchannel;
+ memseg[memseg_idx].nrank = mcfg->nrank;
+
+ /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/
+ memseg[memseg_idx].socket_id = 0;
+ }
+
+ return 0;
+fail:
+ if (xen_fd > 0) {
+ close(xen_fd);
+ xen_fd = -1;
+ }
+ return err;
+}
+
+/*
+ * This creates the memory mappings in the secondary process to match that of
+ * the server process. It goes through each memory segment in the DPDK runtime
+ * configuration, mapping them in order to form a contiguous block in the
+ * virtual memory space
+ */
+int
+rte_xen_dom0_memory_attach(void)
+{
+ const struct rte_mem_config *mcfg;
+ unsigned s = 0; /* s used to track the segment number */
+ int xen_fd = -1;
+ int ret = -1;
+ void *vir_addr;
+ char name[DOM0_NAME_MAX] = {0};
+ int page_size = getpagesize();
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* Check FD and open once */
+ if (xen_fd < 0) {
+ xen_fd = open(DOM0_MM_DEV, O_RDWR);
+ if (xen_fd < 0) {
+ RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
+ goto error;
+ }
+ }
+
+ /* construct memory mangement name for Dom0 */
+ snprintf(name, DOM0_NAME_MAX, "%s-%s",
+ internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
+ /* attach to memory segments of primary process */
+ ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name);
+ if (ret) {
+ RTE_LOG(ERR, EAL,"attach memory segments fail.\n");
+ goto error;
+ }
+
+ /* map all segments into memory to make sure we get the addrs */
+ for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
+
+ /*
+ * the first memory segment with len==0 is the one that
+ * follows the last valid segment.
+ */
+ if (mcfg->memseg[s].len == 0)
+ break;
+
+ vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
+ PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd,
+ s * page_size);
+ if (vir_addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+ "in %s to requested address [%p]\n",
+ (unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV,
+ mcfg->memseg[s].addr);
+ goto error;
+ }
+ }
+ return 0;
+
+error:
+ if (xen_fd >= 0) {
+ close(xen_fd);
+ xen_fd = -1;
+ }
+ return -1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h
new file mode 100644
index 00000000..d9707780
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h
@@ -0,0 +1,108 @@
+/*-
+ * This file is provided under a dual BSD/LGPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GNU LESSER GENERAL PUBLIC LICENSE
+ *
+ * Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Contact Information:
+ * Intel Corporation
+ *
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_DOM0_COMMON_H_
+#define _RTE_DOM0_COMMON_H_
+
+#ifdef __KERNEL__
+#include <linux/if.h>
+#endif
+
+#define DOM0_NAME_MAX 256
+#define DOM0_MM_DEV "/dev/dom0_mm"
+
+#define DOM0_CONTIG_NUM_ORDER 9 /**< order of 2M */
+#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */
+#define DOM0_MEMBLOCK_SIZE 0x200000 /**< size of memory block(2M). */
+#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */
+#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
+
+#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info)
+#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *)
+#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int)
+#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *)
+
+/**
+ * A structure used to store memory information.
+ */
+struct memory_info {
+ char name[DOM0_NAME_MAX];
+ uint64_t size;
+};
+
+/**
+ * A structure used to store memory segment information.
+ */
+struct memseg_info {
+ uint32_t idx;
+ uint64_t pfn;
+ uint64_t size;
+ uint64_t mfn[DOM0_NUM_MEMBLOCK];
+};
+
+/**
+ * A structure used to store memory block information.
+ */
+struct memblock_info {
+ uint8_t exchange_flag;
+ uint8_t used;
+ uint64_t vir_addr;
+ uint64_t pfn;
+ uint64_t mfn;
+};
+#endif /* _RTE_DOM0_COMMON_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
new file mode 100644
index 00000000..3dacbff8
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -0,0 +1,228 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_INTERRUPTS_H_
+#error "don't include this file directly, please include generic <rte_interrupts.h>"
+#endif
+
+#ifndef _RTE_LINUXAPP_INTERRUPTS_H_
+#define _RTE_LINUXAPP_INTERRUPTS_H_
+
+#define RTE_MAX_RXTX_INTR_VEC_ID 32
+#define RTE_INTR_VEC_ZERO_OFFSET 0
+#define RTE_INTR_VEC_RXTX_OFFSET 1
+
+enum rte_intr_handle_type {
+ RTE_INTR_HANDLE_UNKNOWN = 0,
+ RTE_INTR_HANDLE_UIO, /**< uio device handle */
+ RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */
+ RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */
+ RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */
+ RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */
+ RTE_INTR_HANDLE_ALARM, /**< alarm handle */
+ RTE_INTR_HANDLE_EXT, /**< external handler */
+ RTE_INTR_HANDLE_MAX
+};
+
+#define RTE_INTR_EVENT_ADD 1UL
+#define RTE_INTR_EVENT_DEL 2UL
+
+typedef void (*rte_intr_event_cb_t)(int fd, void *arg);
+
+struct rte_epoll_data {
+ uint32_t event; /**< event type */
+ void *data; /**< User data */
+ rte_intr_event_cb_t cb_fun; /**< IN: callback fun */
+ void *cb_arg; /**< IN: callback arg */
+};
+
+enum {
+ RTE_EPOLL_INVALID = 0,
+ RTE_EPOLL_VALID,
+ RTE_EPOLL_EXEC,
+};
+
+/** interrupt epoll event obj, taken by epoll_event.ptr */
+struct rte_epoll_event {
+ volatile uint32_t status; /**< OUT: event status */
+ int fd; /**< OUT: event fd */
+ int epfd; /**< OUT: epoll instance the ev associated with */
+ struct rte_epoll_data epdata;
+};
+
+/** Handle for interrupts. */
+struct rte_intr_handle {
+ union {
+ int vfio_dev_fd; /**< VFIO device file descriptor */
+ int uio_cfg_fd; /**< UIO config file descriptor
+ for uio_pci_generic */
+ };
+ int fd; /**< interrupt event file descriptor */
+ enum rte_intr_handle_type type; /**< handle type */
+ uint32_t max_intr; /**< max interrupt requested */
+ uint32_t nb_efd; /**< number of available efd(event fd) */
+ int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */
+ struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID];
+ /**< intr vector epoll event */
+ int *intr_vec; /**< intr vector number array */
+};
+
+#define RTE_EPOLL_PER_THREAD -1 /**< to hint using per thread epfd */
+
+/**
+ * It waits for events on the epoll instance.
+ *
+ * @param epfd
+ * Epoll instance fd on which the caller wait for events.
+ * @param events
+ * Memory area contains the events that will be available for the caller.
+ * @param maxevents
+ * Up to maxevents are returned, must greater than zero.
+ * @param timeout
+ * Specifying a timeout of -1 causes a block indefinitely.
+ * Specifying a timeout equal to zero cause to return immediately.
+ * @return
+ * - On success, returns the number of available event.
+ * - On failure, a negative value.
+ */
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+ int maxevents, int timeout);
+
+/**
+ * It performs control operations on epoll instance referred by the epfd.
+ * It requests that the operation op be performed for the target fd.
+ *
+ * @param epfd
+ * Epoll instance fd on which the caller perform control operations.
+ * @param op
+ * The operation be performed for the target fd.
+ * @param fd
+ * The target fd on which the control ops perform.
+ * @param event
+ * Describes the object linked to the fd.
+ * Note: The caller must take care the object deletion after CTL_DEL.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_epoll_ctl(int epfd, int op, int fd,
+ struct rte_epoll_event *event);
+
+/**
+ * The function returns the per thread epoll instance.
+ *
+ * @return
+ * epfd the epoll instance referred to.
+ */
+int
+rte_intr_tls_epfd(void);
+
+/**
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ * @param epfd
+ * Epoll instance fd which the intr vector associated to.
+ * @param op
+ * The operation be performed for the vector.
+ * Operation type of {ADD, DEL}.
+ * @param vec
+ * RX intr vector number added to the epoll instance wait list.
+ * @param data
+ * User raw data.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
+ int epfd, int op, unsigned int vec, void *data);
+
+/**
+ * It enables the packet I/O interrupt event if it's necessary.
+ * It creates event fd for each interrupt vector when MSIX is used,
+ * otherwise it multiplexes a single event fd.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ * @param nb_efd
+ * Number of interrupt vector trying to enable.
+ * The value 0 is not allowed.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd);
+
+/**
+ * It disables the packet I/O interrupt event.
+ * It deletes registered eventfds and closes the open fds.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle);
+
+/**
+ * The packet I/O interrupt on datapath is enabled or not.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle);
+
+/**
+ * The interrupt handle instance allows other causes or not.
+ * Other causes stand for any none packet I/O interrupts.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle);
+
+/**
+ * The multiple interrupt vector capability of interrupt handle instance.
+ * It returns zero if no multiple interrupt vector support.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
+
+#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
new file mode 100644
index 00000000..7e5e5984
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -0,0 +1,172 @@
+/*-
+ * This file is provided under a dual BSD/LGPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GNU LESSER GENERAL PUBLIC LICENSE
+ *
+ * Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Contact Information:
+ * Intel Corporation
+ *
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_KNI_COMMON_H_
+#define _RTE_KNI_COMMON_H_
+
+#ifdef __KERNEL__
+#include <linux/if.h>
+#endif
+
+/**
+ * KNI name is part of memzone name.
+ */
+#define RTE_KNI_NAMESIZE 32
+
+#define RTE_CACHE_LINE_MIN_SIZE 64
+
+/*
+ * Request id.
+ */
+enum rte_kni_req_id {
+ RTE_KNI_REQ_UNKNOWN = 0,
+ RTE_KNI_REQ_CHANGE_MTU,
+ RTE_KNI_REQ_CFG_NETWORK_IF,
+ RTE_KNI_REQ_MAX,
+};
+
+/*
+ * Structure for KNI request.
+ */
+struct rte_kni_request {
+ uint32_t req_id; /**< Request id */
+ union {
+ uint32_t new_mtu; /**< New MTU */
+ uint8_t if_up; /**< 1: interface up, 0: interface down */
+ };
+ int32_t result; /**< Result for processing request */
+} __attribute__((__packed__));
+
+/*
+ * Fifo struct mapped in a shared memory. It describes a circular buffer FIFO
+ * Write and read should wrap around. Fifo is empty when write == read
+ * Writing should never overwrite the read position
+ */
+struct rte_kni_fifo {
+ volatile unsigned write; /**< Next position to be written*/
+ volatile unsigned read; /**< Next position to be read */
+ unsigned len; /**< Circular buffer length */
+ unsigned elem_size; /**< Pointer size - for 32/64 bit OS */
+ void * volatile buffer[0]; /**< The buffer contains mbuf pointers */
+};
+
+/*
+ * The kernel image of the rte_mbuf struct, with only the relevant fields.
+ * Padding is necessary to assure the offsets of these fields
+ */
+struct rte_kni_mbuf {
+ void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
+ char pad0[10];
+ uint16_t data_off; /**< Start address of data in segment buffer. */
+ char pad1[4];
+ uint64_t ol_flags; /**< Offload features. */
+ char pad2[4];
+ uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */
+ uint16_t data_len; /**< Amount of data in segment buffer. */
+
+ /* fields on second cache line */
+ char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+ void *pool;
+ void *next;
+};
+
+/*
+ * Struct used to create a KNI device. Passed to the kernel in IOCTL call
+ */
+
+struct rte_kni_device_info {
+ char name[RTE_KNI_NAMESIZE]; /**< Network device name for KNI */
+
+ phys_addr_t tx_phys;
+ phys_addr_t rx_phys;
+ phys_addr_t alloc_phys;
+ phys_addr_t free_phys;
+
+ /* Used by Ethtool */
+ phys_addr_t req_phys;
+ phys_addr_t resp_phys;
+ phys_addr_t sync_phys;
+ void * sync_va;
+
+ /* mbuf mempool */
+ void * mbuf_va;
+ phys_addr_t mbuf_phys;
+
+ /* PCI info */
+ uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
+ uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
+ uint8_t bus; /**< Device bus */
+ uint8_t devid; /**< Device ID */
+ uint8_t function; /**< Device function. */
+
+ uint16_t group_id; /**< Group ID */
+ uint32_t core_id; /**< core ID to bind for kernel thread */
+
+ uint8_t force_bind : 1; /**< Flag for kernel thread binding */
+
+ /* mbuf size */
+ unsigned mbuf_size;
+};
+
+#define KNI_DEVICE "kni"
+
+#define RTE_KNI_IOCTL_TEST _IOWR(0, 1, int)
+#define RTE_KNI_IOCTL_CREATE _IOWR(0, 2, struct rte_kni_device_info)
+#define RTE_KNI_IOCTL_RELEASE _IOWR(0, 3, struct rte_kni_device_info)
+
+#endif /* _RTE_KNI_COMMON_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
new file mode 100644
index 00000000..12503efa
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -0,0 +1,156 @@
+DPDK_2.0 {
+ global:
+
+ __rte_panic;
+ devargs_list;
+ eal_parse_sysfs_value;
+ eal_timer_source;
+ lcore_config;
+ pci_device_list;
+ pci_driver_list;
+ per_lcore__lcore_id;
+ per_lcore__rte_errno;
+ rte_calloc;
+ rte_calloc_socket;
+ rte_cpu_check_supported;
+ rte_cpu_get_flag_enabled;
+ rte_cycles_vmware_tsc_map;
+ rte_delay_us;
+ rte_dump_physmem_layout;
+ rte_dump_registers;
+ rte_dump_stack;
+ rte_dump_tailq;
+ rte_eal_alarm_cancel;
+ rte_eal_alarm_set;
+ rte_eal_dev_init;
+ rte_eal_devargs_add;
+ rte_eal_devargs_dump;
+ rte_eal_devargs_type_count;
+ rte_eal_driver_register;
+ rte_eal_driver_unregister;
+ rte_eal_get_configuration;
+ rte_eal_get_lcore_state;
+ rte_eal_get_physmem_layout;
+ rte_eal_get_physmem_size;
+ rte_eal_has_hugepages;
+ rte_eal_hpet_init;
+ rte_eal_init;
+ rte_eal_iopl_init;
+ rte_eal_lcore_role;
+ rte_eal_mp_remote_launch;
+ rte_eal_mp_wait_lcore;
+ rte_eal_parse_devargs_str;
+ rte_eal_pci_dump;
+ rte_eal_pci_probe;
+ rte_eal_pci_probe_one;
+ rte_eal_pci_register;
+ rte_eal_pci_scan;
+ rte_eal_pci_unregister;
+ rte_eal_process_type;
+ rte_eal_remote_launch;
+ rte_eal_tailq_lookup;
+ rte_eal_tailq_register;
+ rte_eal_vdev_init;
+ rte_eal_vdev_uninit;
+ rte_eal_wait_lcore;
+ rte_exit;
+ rte_free;
+ rte_get_hpet_cycles;
+ rte_get_hpet_hz;
+ rte_get_log_level;
+ rte_get_log_type;
+ rte_get_tsc_hz;
+ rte_hexdump;
+ rte_intr_callback_register;
+ rte_intr_callback_unregister;
+ rte_intr_disable;
+ rte_intr_enable;
+ rte_log;
+ rte_log_add_in_history;
+ rte_log_cur_msg_loglevel;
+ rte_log_cur_msg_logtype;
+ rte_log_dump_history;
+ rte_log_set_history;
+ rte_logs;
+ rte_malloc;
+ rte_malloc_dump_stats;
+ rte_malloc_get_socket_stats;
+ rte_malloc_set_limit;
+ rte_malloc_socket;
+ rte_malloc_validate;
+ rte_malloc_virt2phy;
+ rte_mem_lock_page;
+ rte_mem_phy2mch;
+ rte_mem_virt2phy;
+ rte_memdump;
+ rte_memory_get_nchannel;
+ rte_memory_get_nrank;
+ rte_memzone_dump;
+ rte_memzone_lookup;
+ rte_memzone_reserve;
+ rte_memzone_reserve_aligned;
+ rte_memzone_reserve_bounded;
+ rte_memzone_walk;
+ rte_openlog_stream;
+ rte_realloc;
+ rte_set_application_usage_hook;
+ rte_set_log_level;
+ rte_set_log_type;
+ rte_socket_id;
+ rte_strerror;
+ rte_strsplit;
+ rte_sys_gettid;
+ rte_thread_get_affinity;
+ rte_thread_set_affinity;
+ rte_vlog;
+ rte_xen_dom0_memory_attach;
+ rte_xen_dom0_memory_init;
+ rte_zmalloc;
+ rte_zmalloc_socket;
+
+ local: *;
+};
+
+DPDK_2.1 {
+ global:
+
+ rte_eal_pci_detach;
+ rte_eal_pci_read_config;
+ rte_eal_pci_write_config;
+ rte_epoll_ctl;
+ rte_epoll_wait;
+ rte_intr_allow_others;
+ rte_intr_dp_is_en;
+ rte_intr_efd_disable;
+ rte_intr_efd_enable;
+ rte_intr_rx_ctl;
+ rte_intr_tls_epfd;
+ rte_memzone_free;
+
+} DPDK_2.0;
+
+DPDK_2.2 {
+ global:
+
+ rte_intr_cap_multiple;
+ rte_keepalive_create;
+ rte_keepalive_dispatch_pings;
+ rte_keepalive_mark_alive;
+ rte_keepalive_register_core;
+ rte_xen_dom0_supported;
+
+} DPDK_2.1;
+
+DPDK_16.04 {
+ global:
+
+ rte_cpu_get_flag_name;
+ rte_eal_pci_ioport_map;
+ rte_eal_pci_ioport_read;
+ rte_eal_pci_ioport_unmap;
+ rte_eal_pci_ioport_write;
+ rte_eal_pci_map_device;
+ rte_eal_pci_unmap_device;
+ rte_eal_primary_proc_alive;
+
+} DPDK_2.2;
diff --git a/lib/librte_eal/linuxapp/igb_uio/Makefile b/lib/librte_eal/linuxapp/igb_uio/Makefile
new file mode 100644
index 00000000..ec6e702f
--- /dev/null
+++ b/lib/librte_eal/linuxapp/igb_uio/Makefile
@@ -0,0 +1,53 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = igb_uio
+MODULE_PATH = drivers/net/igb_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := igb_uio.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h
new file mode 100644
index 00000000..c1d45a66
--- /dev/null
+++ b/lib/librte_eal/linuxapp/igb_uio/compat.h
@@ -0,0 +1,116 @@
+/*
+ * Minimal wrappers to allow compiling igb_uio on older kernels.
+ */
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+#define pci_cfg_access_lock pci_block_user_cfg_access
+#define pci_cfg_access_unlock pci_unblock_user_cfg_access
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
+#define HAVE_PTE_MASK_PAGE_IOMAP
+#endif
+
+#ifndef PCI_MSIX_ENTRY_SIZE
+#define PCI_MSIX_ENTRY_SIZE 16
+#define PCI_MSIX_ENTRY_LOWER_ADDR 0
+#define PCI_MSIX_ENTRY_UPPER_ADDR 4
+#define PCI_MSIX_ENTRY_DATA 8
+#define PCI_MSIX_ENTRY_VECTOR_CTRL 12
+#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \
+ (!(defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9)))
+
+static int pci_num_vf(struct pci_dev *dev)
+{
+ struct iov {
+ int pos;
+ int nres;
+ u32 cap;
+ u16 ctrl;
+ u16 total;
+ u16 initial;
+ u16 nr_virtfn;
+ } *iov = (struct iov *)dev->sriov;
+
+ if (!dev->is_physfn)
+ return 0;
+
+ return iov->nr_virtfn;
+}
+
+#endif /* < 2.6.34 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
+ (!(defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
+
+#define kstrtoul strict_strtoul
+
+#endif /* < 2.6.39 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) && \
+ (!(defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 3)))
+
+/* Check if INTX works to control irq's.
+ * Set's INTX_DISABLE flag and reads it back
+ */
+static bool pci_intx_mask_supported(struct pci_dev *pdev)
+{
+ bool mask_supported = false;
+ uint16_t orig, new;
+
+ pci_block_user_cfg_access(pdev);
+ pci_read_config_word(pdev, PCI_COMMAND, &orig);
+ pci_write_config_word(pdev, PCI_COMMAND,
+ orig ^ PCI_COMMAND_INTX_DISABLE);
+ pci_read_config_word(pdev, PCI_COMMAND, &new);
+
+ if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
+ dev_err(&pdev->dev, "Command register changed from "
+ "0x%x to 0x%x: driver or hardware bug?\n", orig, new);
+ } else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) {
+ mask_supported = true;
+ pci_write_config_word(pdev, PCI_COMMAND, orig);
+ }
+ pci_unblock_user_cfg_access(pdev);
+
+ return mask_supported;
+}
+
+static bool pci_check_and_mask_intx(struct pci_dev *pdev)
+{
+ bool pending;
+ uint32_t status;
+
+ pci_block_user_cfg_access(pdev);
+ pci_read_config_dword(pdev, PCI_COMMAND, &status);
+
+ /* interrupt is not ours, goes to out */
+ pending = (((status >> 16) & PCI_STATUS_INTERRUPT) != 0);
+ if (pending) {
+ uint16_t old, new;
+
+ old = status;
+ if (status != 0)
+ new = old & (~PCI_COMMAND_INTX_DISABLE);
+ else
+ new = old | PCI_COMMAND_INTX_DISABLE;
+
+ if (old != new)
+ pci_write_config_word(pdev, PCI_COMMAND, new);
+ }
+ pci_unblock_user_cfg_access(pdev);
+
+ return pending;
+}
+
+#endif /* < 3.3.0 */
diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
new file mode 100644
index 00000000..72b26923
--- /dev/null
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -0,0 +1,580 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/uio_driver.h>
+#include <linux/io.h>
+#include <linux/msi.h>
+#include <linux/version.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_XEN_DOM0
+#include <xen/xen.h>
+#endif
+#include <rte_pci_dev_features.h>
+
+#include "compat.h"
+
+/**
+ * A structure describing the private information for a uio device.
+ */
+struct rte_uio_pci_dev {
+ struct uio_info info;
+ struct pci_dev *pdev;
+ enum rte_intr_mode mode;
+};
+
+static char *intr_mode;
+static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
+
+/* sriov sysfs */
+static ssize_t
+show_max_vfs(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, 10, "%u\n", dev_num_vf(dev));
+}
+
+static ssize_t
+store_max_vfs(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int err = 0;
+ unsigned long max_vfs;
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (0 != kstrtoul(buf, 0, &max_vfs))
+ return -EINVAL;
+
+ if (0 == max_vfs)
+ pci_disable_sriov(pdev);
+ else if (0 == pci_num_vf(pdev))
+ err = pci_enable_sriov(pdev, max_vfs);
+ else /* do nothing if change max_vfs number */
+ err = -EINVAL;
+
+ return err ? err : count;
+}
+
+#ifdef RTE_PCI_CONFIG
+static ssize_t
+show_extended_tag(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ dev_info(dev, "Deprecated\n");
+
+ return 0;
+}
+
+static ssize_t
+store_extended_tag(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ dev_info(dev, "Deprecated\n");
+
+ return 0;
+}
+
+static ssize_t
+show_max_read_request_size(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ dev_info(dev, "Deprecated\n");
+
+ return 0;
+}
+
+static ssize_t
+store_max_read_request_size(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ dev_info(dev, "Deprecated\n");
+
+ return 0;
+}
+#endif
+
+static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs);
+#ifdef RTE_PCI_CONFIG
+static DEVICE_ATTR(extended_tag, S_IRUGO | S_IWUSR, show_extended_tag,
+ store_extended_tag);
+static DEVICE_ATTR(max_read_request_size, S_IRUGO | S_IWUSR,
+ show_max_read_request_size, store_max_read_request_size);
+#endif
+
+static struct attribute *dev_attrs[] = {
+ &dev_attr_max_vfs.attr,
+#ifdef RTE_PCI_CONFIG
+ &dev_attr_extended_tag.attr,
+ &dev_attr_max_read_request_size.attr,
+#endif
+ NULL,
+};
+
+static const struct attribute_group dev_attr_grp = {
+ .attrs = dev_attrs,
+};
+/*
+ * It masks the msix on/off of generating MSI-X messages.
+ */
+static void
+igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state)
+{
+ u32 mask_bits = desc->masked;
+ unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+ if (state != 0)
+ mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ else
+ mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+
+ if (mask_bits != desc->masked) {
+ writel(mask_bits, desc->mask_base + offset);
+ readl(desc->mask_base);
+ desc->masked = mask_bits;
+ }
+}
+
+/**
+ * This is the irqcontrol callback to be registered to uio_info.
+ * It can be used to disable/enable interrupt from user space processes.
+ *
+ * @param info
+ * pointer to uio_info.
+ * @param irq_state
+ * state value. 1 to enable interrupt, 0 to disable interrupt.
+ *
+ * @return
+ * - On success, 0.
+ * - On failure, a negative value.
+ */
+static int
+igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
+{
+ struct rte_uio_pci_dev *udev = info->priv;
+ struct pci_dev *pdev = udev->pdev;
+
+ pci_cfg_access_lock(pdev);
+ if (udev->mode == RTE_INTR_MODE_LEGACY)
+ pci_intx(pdev, !!irq_state);
+
+ else if (udev->mode == RTE_INTR_MODE_MSIX) {
+ struct msi_desc *desc;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0))
+ list_for_each_entry(desc, &pdev->msi_list, list)
+ igbuio_msix_mask_irq(desc, irq_state);
+#else
+ list_for_each_entry(desc, &pdev->dev.msi_list, list)
+ igbuio_msix_mask_irq(desc, irq_state);
+#endif
+ }
+ pci_cfg_access_unlock(pdev);
+
+ return 0;
+}
+
+/**
+ * This is interrupt handler which will check if the interrupt is for the right device.
+ * If yes, disable it here and will be enable later.
+ */
+static irqreturn_t
+igbuio_pci_irqhandler(int irq, struct uio_info *info)
+{
+ struct rte_uio_pci_dev *udev = info->priv;
+
+ /* Legacy mode need to mask in hardware */
+ if (udev->mode == RTE_INTR_MODE_LEGACY &&
+ !pci_check_and_mask_intx(udev->pdev))
+ return IRQ_NONE;
+
+ /* Message signal mode, no share IRQ and automasked */
+ return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_XEN_DOM0
+static int
+igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma)
+{
+ int idx;
+
+ idx = (int)vma->vm_pgoff;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+#ifdef HAVE_PTE_MASK_PAGE_IOMAP
+ vma->vm_page_prot.pgprot |= _PAGE_IOMAP;
+#endif
+
+ return remap_pfn_range(vma,
+ vma->vm_start,
+ info->mem[idx].addr >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
+/**
+ * This is uio device mmap method which will use igbuio mmap for Xen
+ * Dom0 environment.
+ */
+static int
+igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma)
+{
+ int idx;
+
+ if (vma->vm_pgoff >= MAX_UIO_MAPS)
+ return -EINVAL;
+
+ if (info->mem[vma->vm_pgoff].size == 0)
+ return -EINVAL;
+
+ idx = (int)vma->vm_pgoff;
+ switch (info->mem[idx].memtype) {
+ case UIO_MEM_PHYS:
+ return igbuio_dom0_mmap_phys(info, vma);
+ case UIO_MEM_LOGICAL:
+ case UIO_MEM_VIRTUAL:
+ default:
+ return -EINVAL;
+ }
+}
+#endif
+
+/* Remap pci resources described by bar #pci_bar in uio resource n. */
+static int
+igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
+ int n, int pci_bar, const char *name)
+{
+ unsigned long addr, len;
+ void *internal_addr;
+
+ if (n >= ARRAY_SIZE(info->mem))
+ return -EINVAL;
+
+ addr = pci_resource_start(dev, pci_bar);
+ len = pci_resource_len(dev, pci_bar);
+ if (addr == 0 || len == 0)
+ return -1;
+ internal_addr = ioremap(addr, len);
+ if (internal_addr == NULL)
+ return -1;
+ info->mem[n].name = name;
+ info->mem[n].addr = addr;
+ info->mem[n].internal_addr = internal_addr;
+ info->mem[n].size = len;
+ info->mem[n].memtype = UIO_MEM_PHYS;
+ return 0;
+}
+
+/* Get pci port io resources described by bar #pci_bar in uio resource n. */
+static int
+igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info,
+ int n, int pci_bar, const char *name)
+{
+ unsigned long addr, len;
+
+ if (n >= ARRAY_SIZE(info->port))
+ return -EINVAL;
+
+ addr = pci_resource_start(dev, pci_bar);
+ len = pci_resource_len(dev, pci_bar);
+ if (addr == 0 || len == 0)
+ return -EINVAL;
+
+ info->port[n].name = name;
+ info->port[n].start = addr;
+ info->port[n].size = len;
+ info->port[n].porttype = UIO_PORT_X86;
+
+ return 0;
+}
+
+/* Unmap previously ioremap'd resources */
+static void
+igbuio_pci_release_iomem(struct uio_info *info)
+{
+ int i;
+
+ for (i = 0; i < MAX_UIO_MAPS; i++) {
+ if (info->mem[i].internal_addr)
+ iounmap(info->mem[i].internal_addr);
+ }
+}
+
+static int
+igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info)
+{
+ int i, iom, iop, ret;
+ unsigned long flags;
+ static const char *bar_names[PCI_STD_RESOURCE_END + 1] = {
+ "BAR0",
+ "BAR1",
+ "BAR2",
+ "BAR3",
+ "BAR4",
+ "BAR5",
+ };
+
+ iom = 0;
+ iop = 0;
+
+ for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
+ if (pci_resource_len(dev, i) != 0 &&
+ pci_resource_start(dev, i) != 0) {
+ flags = pci_resource_flags(dev, i);
+ if (flags & IORESOURCE_MEM) {
+ ret = igbuio_pci_setup_iomem(dev, info, iom,
+ i, bar_names[i]);
+ if (ret != 0)
+ return ret;
+ iom++;
+ } else if (flags & IORESOURCE_IO) {
+ ret = igbuio_pci_setup_ioport(dev, info, iop,
+ i, bar_names[i]);
+ if (ret != 0)
+ return ret;
+ iop++;
+ }
+ }
+ }
+
+ return (iom != 0) ? ret : -ENOENT;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+static int __devinit
+#else
+static int
+#endif
+igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ struct rte_uio_pci_dev *udev;
+ struct msix_entry msix_entry;
+ int err;
+
+ udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
+ if (!udev)
+ return -ENOMEM;
+
+ /*
+ * enable device: ask low-level code to enable I/O and
+ * memory
+ */
+ err = pci_enable_device(dev);
+ if (err != 0) {
+ dev_err(&dev->dev, "Cannot enable PCI device\n");
+ goto fail_free;
+ }
+
+ /*
+ * reserve device's PCI memory regions for use by this
+ * module
+ */
+ err = pci_request_regions(dev, "igb_uio");
+ if (err != 0) {
+ dev_err(&dev->dev, "Cannot request regions\n");
+ goto fail_disable;
+ }
+
+ /* enable bus mastering on the device */
+ pci_set_master(dev);
+
+ /* remap IO memory */
+ err = igbuio_setup_bars(dev, &udev->info);
+ if (err != 0)
+ goto fail_release_iomem;
+
+ /* set 64-bit DMA mask */
+ err = pci_set_dma_mask(dev, DMA_BIT_MASK(64));
+ if (err != 0) {
+ dev_err(&dev->dev, "Cannot set DMA mask\n");
+ goto fail_release_iomem;
+ }
+
+ err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));
+ if (err != 0) {
+ dev_err(&dev->dev, "Cannot set consistent DMA mask\n");
+ goto fail_release_iomem;
+ }
+
+ /* fill uio infos */
+ udev->info.name = "igb_uio";
+ udev->info.version = "0.1";
+ udev->info.handler = igbuio_pci_irqhandler;
+ udev->info.irqcontrol = igbuio_pci_irqcontrol;
+#ifdef CONFIG_XEN_DOM0
+ /* check if the driver run on Xen Dom0 */
+ if (xen_initial_domain())
+ udev->info.mmap = igbuio_dom0_pci_mmap;
+#endif
+ udev->info.priv = udev;
+ udev->pdev = dev;
+
+ switch (igbuio_intr_mode_preferred) {
+ case RTE_INTR_MODE_MSIX:
+ /* Only 1 msi-x vector needed */
+ msix_entry.entry = 0;
+ if (pci_enable_msix(dev, &msix_entry, 1) == 0) {
+ dev_dbg(&dev->dev, "using MSI-X");
+ udev->info.irq = msix_entry.vector;
+ udev->mode = RTE_INTR_MODE_MSIX;
+ break;
+ }
+ /* fall back to INTX */
+ case RTE_INTR_MODE_LEGACY:
+ if (pci_intx_mask_supported(dev)) {
+ dev_dbg(&dev->dev, "using INTX");
+ udev->info.irq_flags = IRQF_SHARED;
+ udev->info.irq = dev->irq;
+ udev->mode = RTE_INTR_MODE_LEGACY;
+ break;
+ }
+ dev_notice(&dev->dev, "PCI INTX mask not supported\n");
+ /* fall back to no IRQ */
+ case RTE_INTR_MODE_NONE:
+ udev->mode = RTE_INTR_MODE_NONE;
+ udev->info.irq = 0;
+ break;
+
+ default:
+ dev_err(&dev->dev, "invalid IRQ mode %u",
+ igbuio_intr_mode_preferred);
+ err = -EINVAL;
+ goto fail_release_iomem;
+ }
+
+ err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
+ if (err != 0)
+ goto fail_release_iomem;
+
+ /* register uio driver */
+ err = uio_register_device(&dev->dev, &udev->info);
+ if (err != 0)
+ goto fail_remove_group;
+
+ pci_set_drvdata(dev, udev);
+
+ dev_info(&dev->dev, "uio device registered with irq %lx\n",
+ udev->info.irq);
+
+ return 0;
+
+fail_remove_group:
+ sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
+fail_release_iomem:
+ igbuio_pci_release_iomem(&udev->info);
+ if (udev->mode == RTE_INTR_MODE_MSIX)
+ pci_disable_msix(udev->pdev);
+ pci_release_regions(dev);
+fail_disable:
+ pci_disable_device(dev);
+fail_free:
+ kfree(udev);
+
+ return err;
+}
+
+static void
+igbuio_pci_remove(struct pci_dev *dev)
+{
+ struct rte_uio_pci_dev *udev = pci_get_drvdata(dev);
+
+ sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
+ uio_unregister_device(&udev->info);
+ igbuio_pci_release_iomem(&udev->info);
+ if (udev->mode == RTE_INTR_MODE_MSIX)
+ pci_disable_msix(dev);
+ pci_release_regions(dev);
+ pci_disable_device(dev);
+ pci_set_drvdata(dev, NULL);
+ kfree(udev);
+}
+
+static int
+igbuio_config_intr_mode(char *intr_str)
+{
+ if (!intr_str) {
+ pr_info("Use MSIX interrupt by default\n");
+ return 0;
+ }
+
+ if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) {
+ igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
+ pr_info("Use MSIX interrupt\n");
+ } else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) {
+ igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY;
+ pr_info("Use legacy interrupt\n");
+ } else {
+ pr_info("Error: bad parameter - %s\n", intr_str);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct pci_driver igbuio_pci_driver = {
+ .name = "igb_uio",
+ .id_table = NULL,
+ .probe = igbuio_pci_probe,
+ .remove = igbuio_pci_remove,
+};
+
+static int __init
+igbuio_pci_init_module(void)
+{
+ int ret;
+
+ ret = igbuio_config_intr_mode(intr_mode);
+ if (ret < 0)
+ return ret;
+
+ return pci_register_driver(&igbuio_pci_driver);
+}
+
+static void __exit
+igbuio_pci_exit_module(void)
+{
+ pci_unregister_driver(&igbuio_pci_driver);
+}
+
+module_init(igbuio_pci_init_module);
+module_exit(igbuio_pci_exit_module);
+
+module_param(intr_mode, charp, S_IRUGO);
+MODULE_PARM_DESC(intr_mode,
+"igb_uio interrupt mode (default=msix):\n"
+" " RTE_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n"
+" " RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n"
+"\n");
+
+MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile
new file mode 100644
index 00000000..ac99d3f1
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/Makefile
@@ -0,0 +1,93 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = rte_kni
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -I$(SRCDIR)/ethtool/ixgbe -I$(SRCDIR)/ethtool/igb
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+MODULE_CFLAGS += -Wall -Werror
+
+ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu)
+MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .)
+UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \
+ | cut -d '"' -f2 | cut -d- -f1,2 | tr .- $(comma)`,1)
+MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))"
+endif
+
+# this lib needs main eal
+DEPDIRS-y += lib/librte_eal/linuxapp/eal
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := ethtool/ixgbe/ixgbe_main.c
+SRCS-y += ethtool/ixgbe/ixgbe_api.c
+SRCS-y += ethtool/ixgbe/ixgbe_common.c
+SRCS-y += ethtool/ixgbe/ixgbe_ethtool.c
+SRCS-y += ethtool/ixgbe/ixgbe_82599.c
+SRCS-y += ethtool/ixgbe/ixgbe_82598.c
+SRCS-y += ethtool/ixgbe/ixgbe_x540.c
+SRCS-y += ethtool/ixgbe/ixgbe_phy.c
+SRCS-y += ethtool/ixgbe/kcompat.c
+
+SRCS-y += ethtool/igb/e1000_82575.c
+SRCS-y += ethtool/igb/e1000_i210.c
+SRCS-y += ethtool/igb/e1000_api.c
+SRCS-y += ethtool/igb/e1000_mac.c
+SRCS-y += ethtool/igb/e1000_manage.c
+SRCS-y += ethtool/igb/e1000_mbx.c
+SRCS-y += ethtool/igb/e1000_nvm.c
+SRCS-y += ethtool/igb/e1000_phy.c
+SRCS-y += ethtool/igb/igb_ethtool.c
+SRCS-y += ethtool/igb/igb_hwmon.c
+SRCS-y += ethtool/igb/igb_main.c
+SRCS-y += ethtool/igb/igb_debugfs.c
+SRCS-y += ethtool/igb/igb_param.c
+SRCS-y += ethtool/igb/igb_procfs.c
+SRCS-y += ethtool/igb/igb_vmdq.c
+#SRCS-y += ethtool/igb/igb_ptp.c
+#SRCS-y += ethtool/igb/kcompat.c
+
+SRCS-y += kni_misc.c
+SRCS-y += kni_net.c
+SRCS-y += kni_ethtool.c
+SRCS-$(CONFIG_RTE_KNI_VHOST) += kni_vhost.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h
new file mode 100644
index 00000000..cf100b67
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/compat.h
@@ -0,0 +1,29 @@
+/*
+ * Minimal wrappers to allow compiling kni on older kernels.
+ */
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
+ (!(defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
+
+#define kstrtoul strict_strtoul
+
+#endif /* < 2.6.39 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
+
+#define sk_sleep(s) (s)->sk_sleep
+
+#endif /* < 2.6.35 */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
+#define HAVE_IOV_ITER_MSGHDR
+#endif
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) )
+#define HAVE_KIOCB_MSG_PARAM
+#endif /* < 4.1.0 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/README b/lib/librte_eal/linuxapp/kni/ethtool/README
new file mode 100644
index 00000000..2cfefe72
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/README
@@ -0,0 +1,100 @@
+..
+ BSD LICENSE
+
+ Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Description
+
+In order to support ethtool in Kernel NIC Interface, the standard Linux kernel
+drivers of ixgbe/igb are needed to be reused here. ixgbe-3.9.17 is the version
+modified from in kernel NIC interface kernel module to support ixgbe NIC, and
+igb-3.4.8 is the version modified from in kernel NIC interface kernel module to
+support igb NIC.
+
+The source code package of ixgbe can be downloaded from sourceforge.net as below.
+http://sourceforge.net/projects/e1000/files/ixgbe%20stable/
+Below source files are copied or modified from ixgbe.
+
+ixgbe_82598.h
+ixgbe_82599.c
+ixgbe_82599.h
+ixgbe_api.c
+ixgbe_api.h
+ixgbe_common.c
+ixgbe_common.h
+ixgbe_dcb.h
+ixgbe_ethtool.c
+ixgbe_fcoe.h
+ixgbe.h
+ixgbe_main.c
+ixgbe_mbx.h
+ixgbe_osdep.h
+ixgbe_phy.c
+ixgbe_phy.h
+ixgbe_sriov.h
+ixgbe_type.h
+kcompat.c
+kcompat.h
+
+The source code package of igb can be downloaded from sourceforge.net as below.
+http://sourceforge.net/projects/e1000/files/igb%20stable/
+Below source files are copied or modified from igb.
+
+e1000_82575.c
+e1000_82575.h
+e1000_api.c
+e1000_api.h
+e1000_defines.h
+e1000_hw.h
+e1000_mac.c
+e1000_mac.h
+e1000_manage.c
+e1000_manage.h
+e1000_mbx.c
+e1000_mbx.h
+e1000_nvm.c
+e1000_nvm.h
+e1000_osdep.h
+e1000_phy.c
+e1000_phy.h
+e1000_regs.h
+igb_ethtool.c
+igb.h
+igb_main.c
+igb_param.c
+igb_procfs.c
+igb_regtest.h
+igb_sysfs.c
+igb_vmdq.c
+igb_vmdq.h
+kcompat.c
+kcompat_ethtool.c
+kcompat.h
+
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING
new file mode 100644
index 00000000..5f297e5b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING
@@ -0,0 +1,339 @@
+
+"This software program is licensed subject to the GNU General Public License
+(GPL). Version 2, June 1991, available at
+<http://www.fsf.org/copyleft/gpl.html>"
+
+GNU General Public License
+
+Version 2, June 1991
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+Everyone is permitted to copy and distribute verbatim copies of this license
+document, but changing it is not allowed.
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to
+share and change it. By contrast, the GNU General Public License is intended
+to guarantee your freedom to share and change free software--to make sure
+the software is free for all its users. This General Public License applies
+to most of the Free Software Foundation's software and to any other program
+whose authors commit to using it. (Some other Free Software Foundation
+software is covered by the GNU Library General Public License instead.) You
+can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our
+General Public Licenses are designed to make sure that you have the freedom
+to distribute copies of free software (and charge for this service if you
+wish), that you receive source code or can get it if you want it, that you
+can change the software or use pieces of it in new free programs; and that
+you know you can do these things.
+
+To protect your rights, we need to make restrictions that forbid anyone to
+deny you these rights or to ask you to surrender the rights. These
+restrictions translate to certain responsibilities for you if you distribute
+copies of the software, or if you modify it.
+
+For example, if you distribute copies of such a program, whether gratis or
+for a fee, you must give the recipients all the rights that you have. You
+must make sure that they, too, receive or can get the source code. And you
+must show them these terms so they know their rights.
+
+We protect your rights with two steps: (1) copyright the software, and (2)
+offer you this license which gives you legal permission to copy, distribute
+and/or modify the software.
+
+Also, for each author's protection and ours, we want to make certain that
+everyone understands that there is no warranty for this free software. If
+the software is modified by someone else and passed on, we want its
+recipients to know that what they have is not the original, so that any
+problems introduced by others will not reflect on the original authors'
+reputations.
+
+Finally, any free program is threatened constantly by software patents. We
+wish to avoid the danger that redistributors of a free program will
+individually obtain patent licenses, in effect making the program
+proprietary. To prevent this, we have made it clear that any patent must be
+licensed for everyone's free use or not licensed at all.
+
+The precise terms and conditions for copying, distribution and modification
+follow.
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License applies to any program or other work which contains a notice
+ placed by the copyright holder saying it may be distributed under the
+ terms of this General Public License. The "Program", below, refers to any
+ such program or work, and a "work based on the Program" means either the
+ Program or any derivative work under copyright law: that is to say, a
+ work containing the Program or a portion of it, either verbatim or with
+ modifications and/or translated into another language. (Hereinafter,
+ translation is included without limitation in the term "modification".)
+ Each licensee is addressed as "you".
+
+ Activities other than copying, distribution and modification are not
+ covered by this License; they are outside its scope. The act of running
+ the Program is not restricted, and the output from the Program is covered
+ only if its contents constitute a work based on the Program (independent
+ of having been made by running the Program). Whether that is true depends
+ on what the Program does.
+
+1. You may copy and distribute verbatim copies of the Program's source code
+ as you receive it, in any medium, provided that you conspicuously and
+ appropriately publish on each copy an appropriate copyright notice and
+ disclaimer of warranty; keep intact all the notices that refer to this
+ License and to the absence of any warranty; and give any other recipients
+ of the Program a copy of this License along with the Program.
+
+ You may charge a fee for the physical act of transferring a copy, and you
+ may at your option offer warranty protection in exchange for a fee.
+
+2. You may modify your copy or copies of the Program or any portion of it,
+ thus forming a work based on the Program, and copy and distribute such
+ modifications or work under the terms of Section 1 above, provided that
+ you also meet all of these conditions:
+
+ * a) You must cause the modified files to carry prominent notices stating
+ that you changed the files and the date of any change.
+
+ * b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any part
+ thereof, to be licensed as a whole at no charge to all third parties
+ under the terms of this License.
+
+ * c) If the modified program normally reads commands interactively when
+ run, you must cause it, when started running for such interactive
+ use in the most ordinary way, to print or display an announcement
+ including an appropriate copyright notice and a notice that there is
+ no warranty (or else, saying that you provide a warranty) and that
+ users may redistribute the program under these conditions, and
+ telling the user how to view a copy of this License. (Exception: if
+ the Program itself is interactive but does not normally print such
+ an announcement, your work based on the Program is not required to
+ print an announcement.)
+
+ These requirements apply to the modified work as a whole. If identifiable
+ sections of that work are not derived from the Program, and can be
+ reasonably considered independent and separate works in themselves, then
+ this License, and its terms, do not apply to those sections when you
+ distribute them as separate works. But when you distribute the same
+ sections as part of a whole which is a work based on the Program, the
+ distribution of the whole must be on the terms of this License, whose
+ permissions for other licensees extend to the entire whole, and thus to
+ each and every part regardless of who wrote it.
+
+ Thus, it is not the intent of this section to claim rights or contest
+ your rights to work written entirely by you; rather, the intent is to
+ exercise the right to control the distribution of derivative or
+ collective works based on the Program.
+
+ In addition, mere aggregation of another work not based on the Program
+ with the Program (or with a work based on the Program) on a volume of a
+ storage or distribution medium does not bring the other work under the
+ scope of this License.
+
+3. You may copy and distribute the Program (or a work based on it, under
+ Section 2) in object code or executable form under the terms of Sections
+ 1 and 2 above provided that you also do one of the following:
+
+ * a) Accompany it with the complete corresponding machine-readable source
+ code, which must be distributed under the terms of Sections 1 and 2
+ above on a medium customarily used for software interchange; or,
+
+ * b) Accompany it with a written offer, valid for at least three years,
+ to give any third party, for a charge no more than your cost of
+ physically performing source distribution, a complete machine-
+ readable copy of the corresponding source code, to be distributed
+ under the terms of Sections 1 and 2 above on a medium customarily
+ used for software interchange; or,
+
+ * c) Accompany it with the information you received as to the offer to
+ distribute corresponding source code. (This alternative is allowed
+ only for noncommercial distribution and only if you received the
+ program in object code or executable form with such an offer, in
+ accord with Subsection b above.)
+
+ The source code for a work means the preferred form of the work for
+ making modifications to it. For an executable work, complete source code
+ means all the source code for all modules it contains, plus any
+ associated interface definition files, plus the scripts used to control
+ compilation and installation of the executable. However, as a special
+ exception, the source code distributed need not include anything that is
+ normally distributed (in either source or binary form) with the major
+ components (compiler, kernel, and so on) of the operating system on which
+ the executable runs, unless that component itself accompanies the
+ executable.
+
+ If distribution of executable or object code is made by offering access
+ to copy from a designated place, then offering equivalent access to copy
+ the source code from the same place counts as distribution of the source
+ code, even though third parties are not compelled to copy the source
+ along with the object code.
+
+4. You may not copy, modify, sublicense, or distribute the Program except as
+ expressly provided under this License. Any attempt otherwise to copy,
+ modify, sublicense or distribute the Program is void, and will
+ automatically terminate your rights under this License. However, parties
+ who have received copies, or rights, from you under this License will not
+ have their licenses terminated so long as such parties remain in full
+ compliance.
+
+5. You are not required to accept this License, since you have not signed
+ it. However, nothing else grants you permission to modify or distribute
+ the Program or its derivative works. These actions are prohibited by law
+ if you do not accept this License. Therefore, by modifying or
+ distributing the Program (or any work based on the Program), you
+ indicate your acceptance of this License to do so, and all its terms and
+ conditions for copying, distributing or modifying the Program or works
+ based on it.
+
+6. Each time you redistribute the Program (or any work based on the
+ Program), the recipient automatically receives a license from the
+ original licensor to copy, distribute or modify the Program subject to
+ these terms and conditions. You may not impose any further restrictions
+ on the recipients' exercise of the rights granted herein. You are not
+ responsible for enforcing compliance by third parties to this License.
+
+7. If, as a consequence of a court judgment or allegation of patent
+ infringement or for any other reason (not limited to patent issues),
+ conditions are imposed on you (whether by court order, agreement or
+ otherwise) that contradict the conditions of this License, they do not
+ excuse you from the conditions of this License. If you cannot distribute
+ so as to satisfy simultaneously your obligations under this License and
+ any other pertinent obligations, then as a consequence you may not
+ distribute the Program at all. For example, if a patent license would
+ not permit royalty-free redistribution of the Program by all those who
+ receive copies directly or indirectly through you, then the only way you
+ could satisfy both it and this License would be to refrain entirely from
+ distribution of the Program.
+
+ If any portion of this section is held invalid or unenforceable under any
+ particular circumstance, the balance of the section is intended to apply
+ and the section as a whole is intended to apply in other circumstances.
+
+ It is not the purpose of this section to induce you to infringe any
+ patents or other property right claims or to contest validity of any
+ such claims; this section has the sole purpose of protecting the
+ integrity of the free software distribution system, which is implemented
+ by public license practices. Many people have made generous contributions
+ to the wide range of software distributed through that system in
+ reliance on consistent application of that system; it is up to the
+ author/donor to decide if he or she is willing to distribute software
+ through any other system and a licensee cannot impose that choice.
+
+ This section is intended to make thoroughly clear what is believed to be
+ a consequence of the rest of this License.
+
+8. If the distribution and/or use of the Program is restricted in certain
+ countries either by patents or by copyrighted interfaces, the original
+ copyright holder who places the Program under this License may add an
+ explicit geographical distribution limitation excluding those countries,
+ so that distribution is permitted only in or among countries not thus
+ excluded. In such case, this License incorporates the limitation as if
+ written in the body of this License.
+
+9. The Free Software Foundation may publish revised and/or new versions of
+ the General Public License from time to time. Such new versions will be
+ similar in spirit to the present version, but may differ in detail to
+ address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the Program
+ specifies a version number of this License which applies to it and "any
+ later version", you have the option of following the terms and
+ conditions either of that version or of any later version published by
+ the Free Software Foundation. If the Program does not specify a version
+ number of this License, you may choose any version ever published by the
+ Free Software Foundation.
+
+10. If you wish to incorporate parts of the Program into other free programs
+ whose distribution conditions are different, write to the author to ask
+ for permission. For software which is copyrighted by the Free Software
+ Foundation, write to the Free Software Foundation; we sometimes make
+ exceptions for this. Our decision will be guided by the two goals of
+ preserving the free status of all derivatives of our free software and
+ of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+ FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+ PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+ EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH
+ YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+ NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+ REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR
+ DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
+ DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM
+ (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
+ INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
+ THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR
+ OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it free
+software which everyone can redistribute and change under these terms.
+
+To do so, attach the following notices to the program. It is safest to
+attach them to the start of each source file to most effectively convey the
+exclusion of warranty; and each file should have at least the "copyright"
+line and a pointer to where the full notice is found.
+
+one line to give the program's name and an idea of what it does.
+Copyright (C) yyyy name of author
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59
+Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this when
+it starts in an interactive mode:
+
+Gnomovision version 69, Copyright (C) year name of author Gnomovision comes
+with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free
+software, and you are welcome to redistribute it under certain conditions;
+type 'show c' for details.
+
+The hypothetical commands 'show w' and 'show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may be
+called something other than 'show w' and 'show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+'Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+signature of Ty Coon, 1 April 1989
+Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General Public
+License instead of this License.
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
new file mode 100644
index 00000000..b8c9a13f
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
@@ -0,0 +1,3665 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/*
+ * 82575EB Gigabit Network Connection
+ * 82575EB Gigabit Backplane Connection
+ * 82575GB Gigabit Network Connection
+ * 82576 Gigabit Network Connection
+ * 82576 Quad Port Gigabit Mezzanine Adapter
+ * 82580 Gigabit Network Connection
+ * I350 Gigabit Network Connection
+ */
+
+#include "e1000_api.h"
+#include "e1000_i210.h"
+
+static s32 e1000_init_phy_params_82575(struct e1000_hw *hw);
+static s32 e1000_init_mac_params_82575(struct e1000_hw *hw);
+static s32 e1000_acquire_phy_82575(struct e1000_hw *hw);
+static void e1000_release_phy_82575(struct e1000_hw *hw);
+static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw);
+static void e1000_release_nvm_82575(struct e1000_hw *hw);
+static s32 e1000_check_for_link_82575(struct e1000_hw *hw);
+static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw);
+static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw);
+static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed,
+ u16 *duplex);
+static s32 e1000_init_hw_82575(struct e1000_hw *hw);
+static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw);
+static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
+ u16 *data);
+static s32 e1000_reset_hw_82575(struct e1000_hw *hw);
+static s32 e1000_reset_hw_82580(struct e1000_hw *hw);
+static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw,
+ u32 offset, u16 *data);
+static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw,
+ u32 offset, u16 data);
+static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw,
+ bool active);
+static s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw,
+ bool active);
+static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw,
+ bool active);
+static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw);
+static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw);
+static s32 e1000_get_media_type_82575(struct e1000_hw *hw);
+static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw);
+static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data);
+static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw,
+ u32 offset, u16 data);
+static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw);
+static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask);
+static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw,
+ u16 *speed, u16 *duplex);
+static s32 e1000_get_phy_id_82575(struct e1000_hw *hw);
+static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask);
+static bool e1000_sgmii_active_82575(struct e1000_hw *hw);
+static s32 e1000_reset_init_script_82575(struct e1000_hw *hw);
+static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw);
+static void e1000_config_collision_dist_82575(struct e1000_hw *hw);
+static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw);
+static void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw);
+static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw);
+static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw);
+static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw);
+static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw);
+static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw);
+static s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw,
+ u16 offset);
+static s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw,
+ u16 offset);
+static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw);
+static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw);
+static void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value);
+static void e1000_clear_vfta_i350(struct e1000_hw *hw);
+
+static void e1000_i2c_start(struct e1000_hw *hw);
+static void e1000_i2c_stop(struct e1000_hw *hw);
+static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data);
+static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data);
+static s32 e1000_get_i2c_ack(struct e1000_hw *hw);
+static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data);
+static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data);
+static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl);
+static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl);
+static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data);
+static bool e1000_get_i2c_data(u32 *i2cctl);
+
+static const u16 e1000_82580_rxpbs_table[] = {
+ 36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 };
+#define E1000_82580_RXPBS_TABLE_SIZE \
+ (sizeof(e1000_82580_rxpbs_table)/sizeof(u16))
+
+
+/**
+ * e1000_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO
+ * @hw: pointer to the HW structure
+ *
+ * Called to determine if the I2C pins are being used for I2C or as an
+ * external MDIO interface since the two options are mutually exclusive.
+ **/
+static bool e1000_sgmii_uses_mdio_82575(struct e1000_hw *hw)
+{
+ u32 reg = 0;
+ bool ext_mdio = false;
+
+ DEBUGFUNC("e1000_sgmii_uses_mdio_82575");
+
+ switch (hw->mac.type) {
+ case e1000_82575:
+ case e1000_82576:
+ reg = E1000_READ_REG(hw, E1000_MDIC);
+ ext_mdio = !!(reg & E1000_MDIC_DEST);
+ break;
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ case e1000_i210:
+ case e1000_i211:
+ reg = E1000_READ_REG(hw, E1000_MDICNFG);
+ ext_mdio = !!(reg & E1000_MDICNFG_EXT_MDIO);
+ break;
+ default:
+ break;
+ }
+ return ext_mdio;
+}
+
+/**
+ * e1000_init_phy_params_82575 - Init PHY func ptrs.
+ * @hw: pointer to the HW structure
+ **/
+static s32 e1000_init_phy_params_82575(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val = E1000_SUCCESS;
+ u32 ctrl_ext;
+
+ DEBUGFUNC("e1000_init_phy_params_82575");
+
+ phy->ops.read_i2c_byte = e1000_read_i2c_byte_generic;
+ phy->ops.write_i2c_byte = e1000_write_i2c_byte_generic;
+
+ if (hw->phy.media_type != e1000_media_type_copper) {
+ phy->type = e1000_phy_none;
+ goto out;
+ }
+
+ phy->ops.power_up = e1000_power_up_phy_copper;
+ phy->ops.power_down = e1000_power_down_phy_copper_82575;
+
+ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+ phy->reset_delay_us = 100;
+
+ phy->ops.acquire = e1000_acquire_phy_82575;
+ phy->ops.check_reset_block = e1000_check_reset_block_generic;
+ phy->ops.commit = e1000_phy_sw_reset_generic;
+ phy->ops.get_cfg_done = e1000_get_cfg_done_82575;
+ phy->ops.release = e1000_release_phy_82575;
+
+ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+
+ if (e1000_sgmii_active_82575(hw)) {
+ phy->ops.reset = e1000_phy_hw_reset_sgmii_82575;
+ ctrl_ext |= E1000_CTRL_I2C_ENA;
+ } else {
+ phy->ops.reset = e1000_phy_hw_reset_generic;
+ ctrl_ext &= ~E1000_CTRL_I2C_ENA;
+ }
+
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+ e1000_reset_mdicnfg_82580(hw);
+
+ if (e1000_sgmii_active_82575(hw) && !e1000_sgmii_uses_mdio_82575(hw)) {
+ phy->ops.read_reg = e1000_read_phy_reg_sgmii_82575;
+ phy->ops.write_reg = e1000_write_phy_reg_sgmii_82575;
+ } else {
+ switch (hw->mac.type) {
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ phy->ops.read_reg = e1000_read_phy_reg_82580;
+ phy->ops.write_reg = e1000_write_phy_reg_82580;
+ break;
+ case e1000_i210:
+ case e1000_i211:
+ phy->ops.read_reg = e1000_read_phy_reg_gs40g;
+ phy->ops.write_reg = e1000_write_phy_reg_gs40g;
+ break;
+ default:
+ phy->ops.read_reg = e1000_read_phy_reg_igp;
+ phy->ops.write_reg = e1000_write_phy_reg_igp;
+ }
+ }
+
+ /* Set phy->phy_addr and phy->id. */
+ ret_val = e1000_get_phy_id_82575(hw);
+
+ /* Verify phy id and set remaining function pointers */
+ switch (phy->id) {
+ case M88E1543_E_PHY_ID:
+ case I347AT4_E_PHY_ID:
+ case M88E1112_E_PHY_ID:
+ case M88E1340M_E_PHY_ID:
+ case M88E1111_I_PHY_ID:
+ phy->type = e1000_phy_m88;
+ phy->ops.check_polarity = e1000_check_polarity_m88;
+ phy->ops.get_info = e1000_get_phy_info_m88;
+ if (phy->id == I347AT4_E_PHY_ID ||
+ phy->id == M88E1112_E_PHY_ID ||
+ phy->id == M88E1340M_E_PHY_ID)
+ phy->ops.get_cable_length =
+ e1000_get_cable_length_m88_gen2;
+ else if (phy->id == M88E1543_E_PHY_ID)
+ phy->ops.get_cable_length =
+ e1000_get_cable_length_m88_gen2;
+ else
+ phy->ops.get_cable_length = e1000_get_cable_length_m88;
+ phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88;
+ /* Check if this PHY is confgured for media swap. */
+ if (phy->id == M88E1112_E_PHY_ID) {
+ u16 data;
+
+ ret_val = phy->ops.write_reg(hw,
+ E1000_M88E1112_PAGE_ADDR,
+ 2);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.read_reg(hw,
+ E1000_M88E1112_MAC_CTRL_1,
+ &data);
+ if (ret_val)
+ goto out;
+
+ data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >>
+ E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT;
+ if (data == E1000_M88E1112_AUTO_COPPER_SGMII ||
+ data == E1000_M88E1112_AUTO_COPPER_BASEX)
+ hw->mac.ops.check_for_link =
+ e1000_check_for_link_media_swap;
+ }
+ break;
+ case IGP03E1000_E_PHY_ID:
+ case IGP04E1000_E_PHY_ID:
+ phy->type = e1000_phy_igp_3;
+ phy->ops.check_polarity = e1000_check_polarity_igp;
+ phy->ops.get_info = e1000_get_phy_info_igp;
+ phy->ops.get_cable_length = e1000_get_cable_length_igp_2;
+ phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp;
+ phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82575;
+ phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_generic;
+ break;
+ case I82580_I_PHY_ID:
+ case I350_I_PHY_ID:
+ phy->type = e1000_phy_82580;
+ phy->ops.check_polarity = e1000_check_polarity_82577;
+ phy->ops.force_speed_duplex =
+ e1000_phy_force_speed_duplex_82577;
+ phy->ops.get_cable_length = e1000_get_cable_length_82577;
+ phy->ops.get_info = e1000_get_phy_info_82577;
+ phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580;
+ phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580;
+ break;
+ case I210_I_PHY_ID:
+ phy->type = e1000_phy_i210;
+ phy->ops.check_polarity = e1000_check_polarity_m88;
+ phy->ops.get_info = e1000_get_phy_info_m88;
+ phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2;
+ phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580;
+ phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580;
+ phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88;
+ break;
+ default:
+ ret_val = -E1000_ERR_PHY;
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_init_nvm_params_82575 - Init NVM func ptrs.
+ * @hw: pointer to the HW structure
+ **/
+s32 e1000_init_nvm_params_82575(struct e1000_hw *hw)
+{
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+ u16 size;
+
+ DEBUGFUNC("e1000_init_nvm_params_82575");
+
+ size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
+ E1000_EECD_SIZE_EX_SHIFT);
+ /*
+ * Added to a constant, "size" becomes the left-shift value
+ * for setting word_size.
+ */
+ size += NVM_WORD_SIZE_BASE_SHIFT;
+
+ /* Just in case size is out of range, cap it to the largest
+ * EEPROM size supported
+ */
+ if (size > 15)
+ size = 15;
+
+ nvm->word_size = 1 << size;
+ if (hw->mac.type < e1000_i210) {
+ nvm->opcode_bits = 8;
+ nvm->delay_usec = 1;
+
+ switch (nvm->override) {
+ case e1000_nvm_override_spi_large:
+ nvm->page_size = 32;
+ nvm->address_bits = 16;
+ break;
+ case e1000_nvm_override_spi_small:
+ nvm->page_size = 8;
+ nvm->address_bits = 8;
+ break;
+ default:
+ nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
+ nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ?
+ 16 : 8;
+ break;
+ }
+ if (nvm->word_size == (1 << 15))
+ nvm->page_size = 128;
+
+ nvm->type = e1000_nvm_eeprom_spi;
+ } else {
+ nvm->type = e1000_nvm_flash_hw;
+ }
+
+ /* Function Pointers */
+ nvm->ops.acquire = e1000_acquire_nvm_82575;
+ nvm->ops.release = e1000_release_nvm_82575;
+ if (nvm->word_size < (1 << 15))
+ nvm->ops.read = e1000_read_nvm_eerd;
+ else
+ nvm->ops.read = e1000_read_nvm_spi;
+
+ nvm->ops.write = e1000_write_nvm_spi;
+ nvm->ops.validate = e1000_validate_nvm_checksum_generic;
+ nvm->ops.update = e1000_update_nvm_checksum_generic;
+ nvm->ops.valid_led_default = e1000_valid_led_default_82575;
+
+ /* override generic family function pointers for specific descendants */
+ switch (hw->mac.type) {
+ case e1000_82580:
+ nvm->ops.validate = e1000_validate_nvm_checksum_82580;
+ nvm->ops.update = e1000_update_nvm_checksum_82580;
+ break;
+ case e1000_i350:
+ //case e1000_i354:
+ nvm->ops.validate = e1000_validate_nvm_checksum_i350;
+ nvm->ops.update = e1000_update_nvm_checksum_i350;
+ break;
+ default:
+ break;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_init_mac_params_82575 - Init MAC func ptrs.
+ * @hw: pointer to the HW structure
+ **/
+static s32 e1000_init_mac_params_82575(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+
+ DEBUGFUNC("e1000_init_mac_params_82575");
+
+ /* Derives media type */
+ e1000_get_media_type_82575(hw);
+ /* Set mta register count */
+ mac->mta_reg_count = 128;
+ /* Set uta register count */
+ mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128;
+ /* Set rar entry count */
+ mac->rar_entry_count = E1000_RAR_ENTRIES_82575;
+ if (mac->type == e1000_82576)
+ mac->rar_entry_count = E1000_RAR_ENTRIES_82576;
+ if (mac->type == e1000_82580)
+ mac->rar_entry_count = E1000_RAR_ENTRIES_82580;
+ if (mac->type == e1000_i350 || mac->type == e1000_i354)
+ mac->rar_entry_count = E1000_RAR_ENTRIES_I350;
+
+ /* Enable EEE default settings for EEE supported devices */
+ if (mac->type >= e1000_i350)
+ dev_spec->eee_disable = false;
+
+ /* Allow a single clear of the SW semaphore on I210 and newer */
+ if (mac->type >= e1000_i210)
+ dev_spec->clear_semaphore_once = true;
+
+ /* Set if part includes ASF firmware */
+ mac->asf_firmware_present = true;
+ /* FWSM register */
+ mac->has_fwsm = true;
+ /* ARC supported; valid only if manageability features are enabled. */
+ mac->arc_subsystem_valid =
+ !!(E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK);
+
+ /* Function pointers */
+
+ /* bus type/speed/width */
+ mac->ops.get_bus_info = e1000_get_bus_info_pcie_generic;
+ /* reset */
+ if (mac->type >= e1000_82580)
+ mac->ops.reset_hw = e1000_reset_hw_82580;
+ else
+ mac->ops.reset_hw = e1000_reset_hw_82575;
+ /* hw initialization */
+ mac->ops.init_hw = e1000_init_hw_82575;
+ /* link setup */
+ mac->ops.setup_link = e1000_setup_link_generic;
+ /* physical interface link setup */
+ mac->ops.setup_physical_interface =
+ (hw->phy.media_type == e1000_media_type_copper)
+ ? e1000_setup_copper_link_82575 : e1000_setup_serdes_link_82575;
+ /* physical interface shutdown */
+ mac->ops.shutdown_serdes = e1000_shutdown_serdes_link_82575;
+ /* physical interface power up */
+ mac->ops.power_up_serdes = e1000_power_up_serdes_link_82575;
+ /* check for link */
+ mac->ops.check_for_link = e1000_check_for_link_82575;
+ /* read mac address */
+ mac->ops.read_mac_addr = e1000_read_mac_addr_82575;
+ /* configure collision distance */
+ mac->ops.config_collision_dist = e1000_config_collision_dist_82575;
+ /* multicast address update */
+ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic;
+ if (hw->mac.type == e1000_i350 || mac->type == e1000_i354) {
+ /* writing VFTA */
+ mac->ops.write_vfta = e1000_write_vfta_i350;
+ /* clearing VFTA */
+ mac->ops.clear_vfta = e1000_clear_vfta_i350;
+ } else {
+ /* writing VFTA */
+ mac->ops.write_vfta = e1000_write_vfta_generic;
+ /* clearing VFTA */
+ mac->ops.clear_vfta = e1000_clear_vfta_generic;
+ }
+ if (hw->mac.type >= e1000_82580)
+ mac->ops.validate_mdi_setting =
+ e1000_validate_mdi_setting_crossover_generic;
+ /* ID LED init */
+ mac->ops.id_led_init = e1000_id_led_init_generic;
+ /* blink LED */
+ mac->ops.blink_led = e1000_blink_led_generic;
+ /* setup LED */
+ mac->ops.setup_led = e1000_setup_led_generic;
+ /* cleanup LED */
+ mac->ops.cleanup_led = e1000_cleanup_led_generic;
+ /* turn on/off LED */
+ mac->ops.led_on = e1000_led_on_generic;
+ mac->ops.led_off = e1000_led_off_generic;
+ /* clear hardware counters */
+ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82575;
+ /* link info */
+ mac->ops.get_link_up_info = e1000_get_link_up_info_82575;
+ /* get thermal sensor data */
+ mac->ops.get_thermal_sensor_data =
+ e1000_get_thermal_sensor_data_generic;
+ mac->ops.init_thermal_sensor_thresh =
+ e1000_init_thermal_sensor_thresh_generic;
+ /* acquire SW_FW sync */
+ mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575;
+ mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575;
+ if (mac->type >= e1000_i210) {
+ mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210;
+ mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210;
+ }
+
+ /* set lan id for port to determine which phy lock to use */
+ hw->mac.ops.set_lan_id(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_init_function_pointers_82575 - Init func ptrs.
+ * @hw: pointer to the HW structure
+ *
+ * Called to initialize all function pointers and parameters.
+ **/
+void e1000_init_function_pointers_82575(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_init_function_pointers_82575");
+
+ hw->mac.ops.init_params = e1000_init_mac_params_82575;
+ hw->nvm.ops.init_params = e1000_init_nvm_params_82575;
+ hw->phy.ops.init_params = e1000_init_phy_params_82575;
+ hw->mbx.ops.init_params = e1000_init_mbx_params_pf;
+}
+
+/**
+ * e1000_acquire_phy_82575 - Acquire rights to access PHY
+ * @hw: pointer to the HW structure
+ *
+ * Acquire access rights to the correct PHY.
+ **/
+static s32 e1000_acquire_phy_82575(struct e1000_hw *hw)
+{
+ u16 mask = E1000_SWFW_PHY0_SM;
+
+ DEBUGFUNC("e1000_acquire_phy_82575");
+
+ if (hw->bus.func == E1000_FUNC_1)
+ mask = E1000_SWFW_PHY1_SM;
+ else if (hw->bus.func == E1000_FUNC_2)
+ mask = E1000_SWFW_PHY2_SM;
+ else if (hw->bus.func == E1000_FUNC_3)
+ mask = E1000_SWFW_PHY3_SM;
+
+ return hw->mac.ops.acquire_swfw_sync(hw, mask);
+}
+
+/**
+ * e1000_release_phy_82575 - Release rights to access PHY
+ * @hw: pointer to the HW structure
+ *
+ * A wrapper to release access rights to the correct PHY.
+ **/
+static void e1000_release_phy_82575(struct e1000_hw *hw)
+{
+ u16 mask = E1000_SWFW_PHY0_SM;
+
+ DEBUGFUNC("e1000_release_phy_82575");
+
+ if (hw->bus.func == E1000_FUNC_1)
+ mask = E1000_SWFW_PHY1_SM;
+ else if (hw->bus.func == E1000_FUNC_2)
+ mask = E1000_SWFW_PHY2_SM;
+ else if (hw->bus.func == E1000_FUNC_3)
+ mask = E1000_SWFW_PHY3_SM;
+
+ hw->mac.ops.release_swfw_sync(hw, mask);
+}
+
+/**
+ * e1000_read_phy_reg_sgmii_82575 - Read PHY register using sgmii
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Reads the PHY register at offset using the serial gigabit media independent
+ * interface and stores the retrieved information in data.
+ **/
+static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
+ u16 *data)
+{
+ s32 ret_val = -E1000_ERR_PARAM;
+
+ DEBUGFUNC("e1000_read_phy_reg_sgmii_82575");
+
+ if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) {
+ DEBUGOUT1("PHY Address %u is out of range\n", offset);
+ goto out;
+ }
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ goto out;
+
+ ret_val = e1000_read_phy_reg_i2c(hw, offset, data);
+
+ hw->phy.ops.release(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_write_phy_reg_sgmii_82575 - Write PHY register using sgmii
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Writes the data to PHY register at the offset using the serial gigabit
+ * media independent interface.
+ **/
+static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
+ u16 data)
+{
+ s32 ret_val = -E1000_ERR_PARAM;
+
+ DEBUGFUNC("e1000_write_phy_reg_sgmii_82575");
+
+ if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) {
+ DEBUGOUT1("PHY Address %d is out of range\n", offset);
+ goto out;
+ }
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ goto out;
+
+ ret_val = e1000_write_phy_reg_i2c(hw, offset, data);
+
+ hw->phy.ops.release(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_get_phy_id_82575 - Retrieve PHY addr and id
+ * @hw: pointer to the HW structure
+ *
+ * Retrieves the PHY address and ID for both PHY's which do and do not use
+ * sgmi interface.
+ **/
+static s32 e1000_get_phy_id_82575(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val = E1000_SUCCESS;
+ u16 phy_id;
+ u32 ctrl_ext;
+ u32 mdic;
+
+ DEBUGFUNC("e1000_get_phy_id_82575");
+
+ /* i354 devices can have a PHY that needs an extra read for id */
+ if (hw->mac.type == e1000_i354)
+ e1000_get_phy_id(hw);
+
+
+ /*
+ * For SGMII PHYs, we try the list of possible addresses until
+ * we find one that works. For non-SGMII PHYs
+ * (e.g. integrated copper PHYs), an address of 1 should
+ * work. The result of this function should mean phy->phy_addr
+ * and phy->id are set correctly.
+ */
+ if (!e1000_sgmii_active_82575(hw)) {
+ phy->addr = 1;
+ ret_val = e1000_get_phy_id(hw);
+ goto out;
+ }
+
+ if (e1000_sgmii_uses_mdio_82575(hw)) {
+ switch (hw->mac.type) {
+ case e1000_82575:
+ case e1000_82576:
+ mdic = E1000_READ_REG(hw, E1000_MDIC);
+ mdic &= E1000_MDIC_PHY_MASK;
+ phy->addr = mdic >> E1000_MDIC_PHY_SHIFT;
+ break;
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ case e1000_i210:
+ case e1000_i211:
+ mdic = E1000_READ_REG(hw, E1000_MDICNFG);
+ mdic &= E1000_MDICNFG_PHY_MASK;
+ phy->addr = mdic >> E1000_MDICNFG_PHY_SHIFT;
+ break;
+ default:
+ ret_val = -E1000_ERR_PHY;
+ goto out;
+ break;
+ }
+ ret_val = e1000_get_phy_id(hw);
+ goto out;
+ }
+
+ /* Power on sgmii phy if it is disabled */
+ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT,
+ ctrl_ext & ~E1000_CTRL_EXT_SDP3_DATA);
+ E1000_WRITE_FLUSH(hw);
+ msec_delay(300);
+
+ /*
+ * The address field in the I2CCMD register is 3 bits and 0 is invalid.
+ * Therefore, we need to test 1-7
+ */
+ for (phy->addr = 1; phy->addr < 8; phy->addr++) {
+ ret_val = e1000_read_phy_reg_sgmii_82575(hw, PHY_ID1, &phy_id);
+ if (ret_val == E1000_SUCCESS) {
+ DEBUGOUT2("Vendor ID 0x%08X read at address %u\n",
+ phy_id, phy->addr);
+ /*
+ * At the time of this writing, The M88 part is
+ * the only supported SGMII PHY product.
+ */
+ if (phy_id == M88_VENDOR)
+ break;
+ } else {
+ DEBUGOUT1("PHY address %u was unreadable\n",
+ phy->addr);
+ }
+ }
+
+ /* A valid PHY type couldn't be found. */
+ if (phy->addr == 8) {
+ phy->addr = 0;
+ ret_val = -E1000_ERR_PHY;
+ } else {
+ ret_val = e1000_get_phy_id(hw);
+ }
+
+ /* restore previous sfp cage power state */
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_phy_hw_reset_sgmii_82575 - Performs a PHY reset
+ * @hw: pointer to the HW structure
+ *
+ * Resets the PHY using the serial gigabit media independent interface.
+ **/
+static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("e1000_phy_hw_reset_sgmii_82575");
+
+ /*
+ * This isn't a true "hard" reset, but is the only reset
+ * available to us at this time.
+ */
+
+ DEBUGOUT("Soft resetting SGMII attached PHY...\n");
+
+ if (!(hw->phy.ops.write_reg))
+ goto out;
+
+ /*
+ * SFP documentation requires the following to configure the SPF module
+ * to work on SGMII. No further documentation is given.
+ */
+ ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084);
+ if (ret_val)
+ goto out;
+
+ ret_val = hw->phy.ops.commit(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_set_d0_lplu_state_82575 - Set Low Power Linkup D0 state
+ * @hw: pointer to the HW structure
+ * @active: true to enable LPLU, false to disable
+ *
+ * Sets the LPLU D0 state according to the active flag. When
+ * activating LPLU this function also disables smart speed
+ * and vice versa. LPLU will not be activated unless the
+ * device autonegotiation advertisement meets standards of
+ * either 10 or 10/100 or 10/100/1000 at all duplexes.
+ * This is a function pointer entry point only called by
+ * PHY setup routines.
+ **/
+static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val = E1000_SUCCESS;
+ u16 data;
+
+ DEBUGFUNC("e1000_set_d0_lplu_state_82575");
+
+ if (!(hw->phy.ops.read_reg))
+ goto out;
+
+ ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
+ if (ret_val)
+ goto out;
+
+ if (active) {
+ data |= IGP02E1000_PM_D0_LPLU;
+ ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+ data);
+ if (ret_val)
+ goto out;
+
+ /* When LPLU is enabled, we should disable SmartSpeed */
+ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+ &data);
+ data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+ ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+ data);
+ if (ret_val)
+ goto out;
+ } else {
+ data &= ~IGP02E1000_PM_D0_LPLU;
+ ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+ data);
+ /*
+ * LPLU and SmartSpeed are mutually exclusive. LPLU is used
+ * during Dx states where the power conservation is most
+ * important. During driver activity we should enable
+ * SmartSpeed, so performance is maintained.
+ */
+ if (phy->smart_speed == e1000_smart_speed_on) {
+ ret_val = phy->ops.read_reg(hw,
+ IGP01E1000_PHY_PORT_CONFIG,
+ &data);
+ if (ret_val)
+ goto out;
+
+ data |= IGP01E1000_PSCFR_SMART_SPEED;
+ ret_val = phy->ops.write_reg(hw,
+ IGP01E1000_PHY_PORT_CONFIG,
+ data);
+ if (ret_val)
+ goto out;
+ } else if (phy->smart_speed == e1000_smart_speed_off) {
+ ret_val = phy->ops.read_reg(hw,
+ IGP01E1000_PHY_PORT_CONFIG,
+ &data);
+ if (ret_val)
+ goto out;
+
+ data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+ ret_val = phy->ops.write_reg(hw,
+ IGP01E1000_PHY_PORT_CONFIG,
+ data);
+ if (ret_val)
+ goto out;
+ }
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_set_d0_lplu_state_82580 - Set Low Power Linkup D0 state
+ * @hw: pointer to the HW structure
+ * @active: true to enable LPLU, false to disable
+ *
+ * Sets the LPLU D0 state according to the active flag. When
+ * activating LPLU this function also disables smart speed
+ * and vice versa. LPLU will not be activated unless the
+ * device autonegotiation advertisement meets standards of
+ * either 10 or 10/100 or 10/100/1000 at all duplexes.
+ * This is a function pointer entry point only called by
+ * PHY setup routines.
+ **/
+static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val = E1000_SUCCESS;
+ u32 data;
+
+ DEBUGFUNC("e1000_set_d0_lplu_state_82580");
+
+ data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT);
+
+ if (active) {
+ data |= E1000_82580_PM_D0_LPLU;
+
+ /* When LPLU is enabled, we should disable SmartSpeed */
+ data &= ~E1000_82580_PM_SPD;
+ } else {
+ data &= ~E1000_82580_PM_D0_LPLU;
+
+ /*
+ * LPLU and SmartSpeed are mutually exclusive. LPLU is used
+ * during Dx states where the power conservation is most
+ * important. During driver activity we should enable
+ * SmartSpeed, so performance is maintained.
+ */
+ if (phy->smart_speed == e1000_smart_speed_on)
+ data |= E1000_82580_PM_SPD;
+ else if (phy->smart_speed == e1000_smart_speed_off)
+ data &= ~E1000_82580_PM_SPD;
+ }
+
+ E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data);
+ return ret_val;
+}
+
+/**
+ * e1000_set_d3_lplu_state_82580 - Sets low power link up state for D3
+ * @hw: pointer to the HW structure
+ * @active: boolean used to enable/disable lplu
+ *
+ * Success returns 0, Failure returns 1
+ *
+ * The low power link up (lplu) state is set to the power management level D3
+ * and SmartSpeed is disabled when active is true, else clear lplu for D3
+ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU
+ * is used during Dx states where the power conservation is most important.
+ * During driver activity, SmartSpeed should be enabled so performance is
+ * maintained.
+ **/
+s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val = E1000_SUCCESS;
+ u32 data;
+
+ DEBUGFUNC("e1000_set_d3_lplu_state_82580");
+
+ data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT);
+
+ if (!active) {
+ data &= ~E1000_82580_PM_D3_LPLU;
+ /*
+ * LPLU and SmartSpeed are mutually exclusive. LPLU is used
+ * during Dx states where the power conservation is most
+ * important. During driver activity we should enable
+ * SmartSpeed, so performance is maintained.
+ */
+ if (phy->smart_speed == e1000_smart_speed_on)
+ data |= E1000_82580_PM_SPD;
+ else if (phy->smart_speed == e1000_smart_speed_off)
+ data &= ~E1000_82580_PM_SPD;
+ } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
+ (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
+ (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
+ data |= E1000_82580_PM_D3_LPLU;
+ /* When LPLU is enabled, we should disable SmartSpeed */
+ data &= ~E1000_82580_PM_SPD;
+ }
+
+ E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data);
+ return ret_val;
+}
+
+/**
+ * e1000_acquire_nvm_82575 - Request for access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the necessary semaphores for exclusive access to the EEPROM.
+ * Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ * Return successful if access grant bit set, else clear the request for
+ * EEPROM access and return -E1000_ERR_NVM (-1).
+ **/
+static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_acquire_nvm_82575");
+
+ ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
+ if (ret_val)
+ goto out;
+
+ /*
+ * Check if there is some access
+ * error this access may hook on
+ */
+ if (hw->mac.type == e1000_i350) {
+ u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+ if (eecd & (E1000_EECD_BLOCKED | E1000_EECD_ABORT |
+ E1000_EECD_TIMEOUT)) {
+ /* Clear all access error flags */
+ E1000_WRITE_REG(hw, E1000_EECD, eecd |
+ E1000_EECD_ERROR_CLR);
+ DEBUGOUT("Nvm bit banging access error detected and cleared.\n");
+ }
+ }
+ if (hw->mac.type == e1000_82580) {
+ u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+ if (eecd & E1000_EECD_BLOCKED) {
+ /* Clear access error flag */
+ E1000_WRITE_REG(hw, E1000_EECD, eecd |
+ E1000_EECD_BLOCKED);
+ DEBUGOUT("Nvm bit banging access error detected and cleared.\n");
+ }
+ }
+
+
+ ret_val = e1000_acquire_nvm_generic(hw);
+ if (ret_val)
+ e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_release_nvm_82575 - Release exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Stop any current commands to the EEPROM and clear the EEPROM request bit,
+ * then release the semaphores acquired.
+ **/
+static void e1000_release_nvm_82575(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_release_nvm_82575");
+
+ e1000_release_nvm_generic(hw);
+
+ e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
+}
+
+/**
+ * e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Acquire the SW/FW semaphore to access the PHY or NVM. The mask
+ * will also specify which port we're acquiring the lock for.
+ **/
+static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
+{
+ u32 swfw_sync;
+ u32 swmask = mask;
+ u32 fwmask = mask << 16;
+ s32 ret_val = E1000_SUCCESS;
+ s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
+
+ DEBUGFUNC("e1000_acquire_swfw_sync_82575");
+
+ while (i < timeout) {
+ if (e1000_get_hw_semaphore_generic(hw)) {
+ ret_val = -E1000_ERR_SWFW_SYNC;
+ goto out;
+ }
+
+ swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+ if (!(swfw_sync & (fwmask | swmask)))
+ break;
+
+ /*
+ * Firmware currently using resource (fwmask)
+ * or other software thread using resource (swmask)
+ */
+ e1000_put_hw_semaphore_generic(hw);
+ msec_delay_irq(5);
+ i++;
+ }
+
+ if (i == timeout) {
+ DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
+ ret_val = -E1000_ERR_SWFW_SYNC;
+ goto out;
+ }
+
+ swfw_sync |= swmask;
+ E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+ e1000_put_hw_semaphore_generic(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_release_swfw_sync_82575 - Release SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Release the SW/FW semaphore used to access the PHY or NVM. The mask
+ * will also specify which port we're releasing the lock for.
+ **/
+static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
+{
+ u32 swfw_sync;
+
+ DEBUGFUNC("e1000_release_swfw_sync_82575");
+
+ while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS)
+ ; /* Empty */
+
+ swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+ swfw_sync &= ~mask;
+ E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+ e1000_put_hw_semaphore_generic(hw);
+}
+
+/**
+ * e1000_get_cfg_done_82575 - Read config done bit
+ * @hw: pointer to the HW structure
+ *
+ * Read the management control register for the config done bit for
+ * completion status. NOTE: silicon which is EEPROM-less will fail trying
+ * to read the config done bit, so an error is *ONLY* logged and returns
+ * E1000_SUCCESS. If we were to return with error, EEPROM-less silicon
+ * would not be able to be reset or change link.
+ **/
+static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw)
+{
+ s32 timeout = PHY_CFG_TIMEOUT;
+ s32 ret_val = E1000_SUCCESS;
+ u32 mask = E1000_NVM_CFG_DONE_PORT_0;
+
+ DEBUGFUNC("e1000_get_cfg_done_82575");
+
+ if (hw->bus.func == E1000_FUNC_1)
+ mask = E1000_NVM_CFG_DONE_PORT_1;
+ else if (hw->bus.func == E1000_FUNC_2)
+ mask = E1000_NVM_CFG_DONE_PORT_2;
+ else if (hw->bus.func == E1000_FUNC_3)
+ mask = E1000_NVM_CFG_DONE_PORT_3;
+ while (timeout) {
+ if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask)
+ break;
+ msec_delay(1);
+ timeout--;
+ }
+ if (!timeout)
+ DEBUGOUT("MNG configuration cycle has not completed.\n");
+
+ /* If EEPROM is not marked present, init the PHY manually */
+ if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) &&
+ (hw->phy.type == e1000_phy_igp_3))
+ e1000_phy_init_script_igp3(hw);
+
+ return ret_val;
+}
+
+/**
+ * e1000_get_link_up_info_82575 - Get link speed/duplex info
+ * @hw: pointer to the HW structure
+ * @speed: stores the current speed
+ * @duplex: stores the current duplex
+ *
+ * This is a wrapper function, if using the serial gigabit media independent
+ * interface, use PCS to retrieve the link speed and duplex information.
+ * Otherwise, use the generic function to get the link speed and duplex info.
+ **/
+static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed,
+ u16 *duplex)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_get_link_up_info_82575");
+
+ if (hw->phy.media_type != e1000_media_type_copper)
+ ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, speed,
+ duplex);
+ else
+ ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed,
+ duplex);
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_for_link_82575 - Check for link
+ * @hw: pointer to the HW structure
+ *
+ * If sgmii is enabled, then use the pcs register to determine link, otherwise
+ * use the generic interface for determining link.
+ **/
+static s32 e1000_check_for_link_82575(struct e1000_hw *hw)
+{
+ s32 ret_val;
+ u16 speed, duplex;
+
+ DEBUGFUNC("e1000_check_for_link_82575");
+
+ if (hw->phy.media_type != e1000_media_type_copper) {
+ ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, &speed,
+ &duplex);
+ /*
+ * Use this flag to determine if link needs to be checked or
+ * not. If we have link clear the flag so that we do not
+ * continue to check for link.
+ */
+ hw->mac.get_link_status = !hw->mac.serdes_has_link;
+
+ /*
+ * Configure Flow Control now that Auto-Neg has completed.
+ * First, we need to restore the desired flow control
+ * settings because we may have had to re-autoneg with a
+ * different link partner.
+ */
+ ret_val = e1000_config_fc_after_link_up_generic(hw);
+ if (ret_val)
+ DEBUGOUT("Error configuring flow control\n");
+ } else {
+ ret_val = e1000_check_for_copper_link_generic(hw);
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_for_link_media_swap - Check which M88E1112 interface linked
+ * @hw: pointer to the HW structure
+ *
+ * Poll the M88E1112 interfaces to see which interface achieved link.
+ */
+static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 data;
+ u8 port = 0;
+
+ DEBUGFUNC("e1000_check_for_link_media_swap");
+
+ /* Check the copper medium. */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data);
+ if (ret_val)
+ return ret_val;
+
+ if (data & E1000_M88E1112_STATUS_LINK)
+ port = E1000_MEDIA_PORT_COPPER;
+
+ /* Check the other medium. */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data);
+ if (ret_val)
+ return ret_val;
+
+ if (data & E1000_M88E1112_STATUS_LINK)
+ port = E1000_MEDIA_PORT_OTHER;
+
+ /* Determine if a swap needs to happen. */
+ if (port && (hw->dev_spec._82575.media_port != port)) {
+ hw->dev_spec._82575.media_port = port;
+ hw->dev_spec._82575.media_changed = true;
+ } else {
+ ret_val = e1000_check_for_link_82575(hw);
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_power_up_serdes_link_82575 - Power up the serdes link after shutdown
+ * @hw: pointer to the HW structure
+ **/
+static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw)
+{
+ u32 reg;
+
+ DEBUGFUNC("e1000_power_up_serdes_link_82575");
+
+ if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
+ !e1000_sgmii_active_82575(hw))
+ return;
+
+ /* Enable PCS to turn on link */
+ reg = E1000_READ_REG(hw, E1000_PCS_CFG0);
+ reg |= E1000_PCS_CFG_PCS_EN;
+ E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg);
+
+ /* Power up the laser */
+ reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ reg &= ~E1000_CTRL_EXT_SDP3_DATA;
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
+
+ /* flush the write to verify completion */
+ E1000_WRITE_FLUSH(hw);
+ msec_delay(1);
+}
+
+/**
+ * e1000_get_pcs_speed_and_duplex_82575 - Retrieve current speed/duplex
+ * @hw: pointer to the HW structure
+ * @speed: stores the current speed
+ * @duplex: stores the current duplex
+ *
+ * Using the physical coding sub-layer (PCS), retrieve the current speed and
+ * duplex, then store the values in the pointers provided.
+ **/
+static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw,
+ u16 *speed, u16 *duplex)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ u32 pcs;
+ u32 status;
+
+ DEBUGFUNC("e1000_get_pcs_speed_and_duplex_82575");
+
+ /*
+ * Read the PCS Status register for link state. For non-copper mode,
+ * the status register is not accurate. The PCS status register is
+ * used instead.
+ */
+ pcs = E1000_READ_REG(hw, E1000_PCS_LSTAT);
+
+ /*
+ * The link up bit determines when link is up on autoneg.
+ */
+ if (pcs & E1000_PCS_LSTS_LINK_OK) {
+ mac->serdes_has_link = true;
+
+ /* Detect and store PCS speed */
+ if (pcs & E1000_PCS_LSTS_SPEED_1000)
+ *speed = SPEED_1000;
+ else if (pcs & E1000_PCS_LSTS_SPEED_100)
+ *speed = SPEED_100;
+ else
+ *speed = SPEED_10;
+
+ /* Detect and store PCS duplex */
+ if (pcs & E1000_PCS_LSTS_DUPLEX_FULL)
+ *duplex = FULL_DUPLEX;
+ else
+ *duplex = HALF_DUPLEX;
+
+ /* Check if it is an I354 2.5Gb backplane connection. */
+ if (mac->type == e1000_i354) {
+ status = E1000_READ_REG(hw, E1000_STATUS);
+ if ((status & E1000_STATUS_2P5_SKU) &&
+ !(status & E1000_STATUS_2P5_SKU_OVER)) {
+ *speed = SPEED_2500;
+ *duplex = FULL_DUPLEX;
+ DEBUGOUT("2500 Mbs, ");
+ DEBUGOUT("Full Duplex\n");
+ }
+ }
+
+ } else {
+ mac->serdes_has_link = false;
+ *speed = 0;
+ *duplex = 0;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_shutdown_serdes_link_82575 - Remove link during power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of serdes shut down sfp and PCS on driver unload
+ * when management pass thru is not enabled.
+ **/
+void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw)
+{
+ u32 reg;
+
+ DEBUGFUNC("e1000_shutdown_serdes_link_82575");
+
+ if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
+ !e1000_sgmii_active_82575(hw))
+ return;
+
+ if (!e1000_enable_mng_pass_thru(hw)) {
+ /* Disable PCS to turn off link */
+ reg = E1000_READ_REG(hw, E1000_PCS_CFG0);
+ reg &= ~E1000_PCS_CFG_PCS_EN;
+ E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg);
+
+ /* shutdown the laser */
+ reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ reg |= E1000_CTRL_EXT_SDP3_DATA;
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
+
+ /* flush the write to verify completion */
+ E1000_WRITE_FLUSH(hw);
+ msec_delay(1);
+ }
+
+ return;
+}
+
+/**
+ * e1000_reset_hw_82575 - Reset hardware
+ * @hw: pointer to the HW structure
+ *
+ * This resets the hardware into a known state.
+ **/
+static s32 e1000_reset_hw_82575(struct e1000_hw *hw)
+{
+ u32 ctrl;
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_reset_hw_82575");
+
+ /*
+ * Prevent the PCI-E bus from sticking if there is no TLP connection
+ * on the last TLP read/write transaction when MAC is reset.
+ */
+ ret_val = e1000_disable_pcie_master_generic(hw);
+ if (ret_val)
+ DEBUGOUT("PCI-E Master disable polling has failed.\n");
+
+ /* set the completion timeout for interface */
+ ret_val = e1000_set_pcie_completion_timeout(hw);
+ if (ret_val)
+ DEBUGOUT("PCI-E Set completion timeout has failed.\n");
+
+ DEBUGOUT("Masking off all interrupts\n");
+ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
+
+ E1000_WRITE_REG(hw, E1000_RCTL, 0);
+ E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
+ E1000_WRITE_FLUSH(hw);
+
+ msec_delay(10);
+
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+
+ DEBUGOUT("Issuing a global reset to MAC\n");
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
+
+ ret_val = e1000_get_auto_rd_done_generic(hw);
+ if (ret_val) {
+ /*
+ * When auto config read does not complete, do not
+ * return with an error. This can happen in situations
+ * where there is no eeprom and prevents getting link.
+ */
+ DEBUGOUT("Auto Read Done did not complete\n");
+ }
+
+ /* If EEPROM is not present, run manual init scripts */
+ if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES))
+ e1000_reset_init_script_82575(hw);
+
+ /* Clear any pending interrupt events. */
+ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
+ E1000_READ_REG(hw, E1000_ICR);
+
+ /* Install any alternate MAC address into RAR0 */
+ ret_val = e1000_check_alt_mac_addr_generic(hw);
+
+ return ret_val;
+}
+
+/**
+ * e1000_init_hw_82575 - Initialize hardware
+ * @hw: pointer to the HW structure
+ *
+ * This inits the hardware readying it for operation.
+ **/
+static s32 e1000_init_hw_82575(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ s32 ret_val;
+ u16 i, rar_count = mac->rar_entry_count;
+
+ DEBUGFUNC("e1000_init_hw_82575");
+
+ /* Initialize identification LED */
+ ret_val = mac->ops.id_led_init(hw);
+ if (ret_val) {
+ DEBUGOUT("Error initializing identification LED\n");
+ /* This is not fatal and we should not stop init due to this */
+ }
+
+ /* Disabling VLAN filtering */
+ DEBUGOUT("Initializing the IEEE VLAN\n");
+ mac->ops.clear_vfta(hw);
+
+ /* Setup the receive address */
+ e1000_init_rx_addrs_generic(hw, rar_count);
+
+ /* Zero out the Multicast HASH table */
+ DEBUGOUT("Zeroing the MTA\n");
+ for (i = 0; i < mac->mta_reg_count; i++)
+ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
+
+ /* Zero out the Unicast HASH table */
+ DEBUGOUT("Zeroing the UTA\n");
+ for (i = 0; i < mac->uta_reg_count; i++)
+ E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, 0);
+
+ /* Setup link and flow control */
+ ret_val = mac->ops.setup_link(hw);
+
+ /* Set the default MTU size */
+ hw->dev_spec._82575.mtu = 1500;
+
+ /*
+ * Clear all of the statistics registers (clear on read). It is
+ * important that we do this after we have tried to establish link
+ * because the symbol error count will increment wildly if there
+ * is no link.
+ */
+ e1000_clear_hw_cntrs_82575(hw);
+
+ return ret_val;
+}
+
+/**
+ * e1000_setup_copper_link_82575 - Configure copper link settings
+ * @hw: pointer to the HW structure
+ *
+ * Configures the link for auto-neg or forced speed and duplex. Then we check
+ * for link, once link is established calls to configure collision distance
+ * and flow control are called.
+ **/
+static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw)
+{
+ u32 ctrl;
+ s32 ret_val;
+ u32 phpm_reg;
+
+ DEBUGFUNC("e1000_setup_copper_link_82575");
+
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ ctrl |= E1000_CTRL_SLU;
+ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+ /* Clear Go Link Disconnect bit on supported devices */
+ switch (hw->mac.type) {
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i210:
+ case e1000_i211:
+ phpm_reg = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT);
+ phpm_reg &= ~E1000_82580_PM_GO_LINKD;
+ E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, phpm_reg);
+ break;
+ default:
+ break;
+ }
+
+ ret_val = e1000_setup_serdes_link_82575(hw);
+ if (ret_val)
+ goto out;
+
+ if (e1000_sgmii_active_82575(hw) && !hw->phy.reset_disable) {
+ /* allow time for SFP cage time to power up phy */
+ msec_delay(300);
+
+ ret_val = hw->phy.ops.reset(hw);
+ if (ret_val) {
+ DEBUGOUT("Error resetting the PHY.\n");
+ goto out;
+ }
+ }
+ switch (hw->phy.type) {
+ case e1000_phy_i210:
+ case e1000_phy_m88:
+ switch (hw->phy.id) {
+ case I347AT4_E_PHY_ID:
+ case M88E1112_E_PHY_ID:
+ case M88E1340M_E_PHY_ID:
+ case M88E1543_E_PHY_ID:
+ case I210_I_PHY_ID:
+ ret_val = e1000_copper_link_setup_m88_gen2(hw);
+ break;
+ default:
+ ret_val = e1000_copper_link_setup_m88(hw);
+ break;
+ }
+ break;
+ case e1000_phy_igp_3:
+ ret_val = e1000_copper_link_setup_igp(hw);
+ break;
+ case e1000_phy_82580:
+ ret_val = e1000_copper_link_setup_82577(hw);
+ break;
+ default:
+ ret_val = -E1000_ERR_PHY;
+ break;
+ }
+
+ if (ret_val)
+ goto out;
+
+ ret_val = e1000_setup_copper_link_generic(hw);
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_setup_serdes_link_82575 - Setup link for serdes
+ * @hw: pointer to the HW structure
+ *
+ * Configure the physical coding sub-layer (PCS) link. The PCS link is
+ * used on copper connections where the serialized gigabit media independent
+ * interface (sgmii), or serdes fiber is being used. Configures the link
+ * for auto-negotiation or forces speed/duplex.
+ **/
+static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw)
+{
+ u32 ctrl_ext, ctrl_reg, reg, anadv_reg;
+ bool pcs_autoneg;
+ s32 ret_val = E1000_SUCCESS;
+ u16 data;
+
+ DEBUGFUNC("e1000_setup_serdes_link_82575");
+
+ if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
+ !e1000_sgmii_active_82575(hw))
+ return ret_val;
+
+ /*
+ * On the 82575, SerDes loopback mode persists until it is
+ * explicitly turned off or a power cycle is performed. A read to
+ * the register does not indicate its status. Therefore, we ensure
+ * loopback mode is disabled during initialization.
+ */
+ E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
+
+ /* power on the sfp cage if present */
+ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA;
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+
+ ctrl_reg = E1000_READ_REG(hw, E1000_CTRL);
+ ctrl_reg |= E1000_CTRL_SLU;
+
+ /* set both sw defined pins on 82575/82576*/
+ if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576)
+ ctrl_reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1;
+
+ reg = E1000_READ_REG(hw, E1000_PCS_LCTL);
+
+ /* default pcs_autoneg to the same setting as mac autoneg */
+ pcs_autoneg = hw->mac.autoneg;
+
+ switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) {
+ case E1000_CTRL_EXT_LINK_MODE_SGMII:
+ /* sgmii mode lets the phy handle forcing speed/duplex */
+ pcs_autoneg = true;
+ /* autoneg time out should be disabled for SGMII mode */
+ reg &= ~(E1000_PCS_LCTL_AN_TIMEOUT);
+ break;
+ case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX:
+ /* disable PCS autoneg and support parallel detect only */
+ pcs_autoneg = false;
+ /* fall through to default case */
+ default:
+ if (hw->mac.type == e1000_82575 ||
+ hw->mac.type == e1000_82576) {
+ ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+
+ if (data & E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT)
+ pcs_autoneg = false;
+ }
+
+ /*
+ * non-SGMII modes only supports a speed of 1000/Full for the
+ * link so it is best to just force the MAC and let the pcs
+ * link either autoneg or be forced to 1000/Full
+ */
+ ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD |
+ E1000_CTRL_FD | E1000_CTRL_FRCDPX;
+
+ /* set speed of 1000/Full if speed/duplex is forced */
+ reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL;
+ break;
+ }
+
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg);
+
+ /*
+ * New SerDes mode allows for forcing speed or autonegotiating speed
+ * at 1gb. Autoneg should be default set by most drivers. This is the
+ * mode that will be compatible with older link partners and switches.
+ * However, both are supported by the hardware and some drivers/tools.
+ */
+ reg &= ~(E1000_PCS_LCTL_AN_ENABLE | E1000_PCS_LCTL_FLV_LINK_UP |
+ E1000_PCS_LCTL_FSD | E1000_PCS_LCTL_FORCE_LINK);
+
+ if (pcs_autoneg) {
+ /* Set PCS register for autoneg */
+ reg |= E1000_PCS_LCTL_AN_ENABLE | /* Enable Autoneg */
+ E1000_PCS_LCTL_AN_RESTART; /* Restart autoneg */
+
+ /* Disable force flow control for autoneg */
+ reg &= ~E1000_PCS_LCTL_FORCE_FCTRL;
+
+ /* Configure flow control advertisement for autoneg */
+ anadv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV);
+ anadv_reg &= ~(E1000_TXCW_ASM_DIR | E1000_TXCW_PAUSE);
+
+ switch (hw->fc.requested_mode) {
+ case e1000_fc_full:
+ case e1000_fc_rx_pause:
+ anadv_reg |= E1000_TXCW_ASM_DIR;
+ anadv_reg |= E1000_TXCW_PAUSE;
+ break;
+ case e1000_fc_tx_pause:
+ anadv_reg |= E1000_TXCW_ASM_DIR;
+ break;
+ default:
+ break;
+ }
+
+ E1000_WRITE_REG(hw, E1000_PCS_ANADV, anadv_reg);
+
+ DEBUGOUT1("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg);
+ } else {
+ /* Set PCS register for forced link */
+ reg |= E1000_PCS_LCTL_FSD; /* Force Speed */
+
+ /* Force flow control for forced link */
+ reg |= E1000_PCS_LCTL_FORCE_FCTRL;
+
+ DEBUGOUT1("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg);
+ }
+
+ E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg);
+
+ if (!pcs_autoneg && !e1000_sgmii_active_82575(hw))
+ e1000_force_mac_fc_generic(hw);
+
+ return ret_val;
+}
+
+/**
+ * e1000_get_media_type_82575 - derives current media type.
+ * @hw: pointer to the HW structure
+ *
+ * The media type is chosen reflecting few settings.
+ * The following are taken into account:
+ * - link mode set in the current port Init Control Word #3
+ * - current link mode settings in CSR register
+ * - MDIO vs. I2C PHY control interface chosen
+ * - SFP module media type
+ **/
+static s32 e1000_get_media_type_82575(struct e1000_hw *hw)
+{
+ struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+ s32 ret_val = E1000_SUCCESS;
+ u32 ctrl_ext = 0;
+ u32 link_mode = 0;
+
+ /* Set internal phy as default */
+ dev_spec->sgmii_active = false;
+ dev_spec->module_plugged = false;
+
+ /* Get CSR setting */
+ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+
+ /* extract link mode setting */
+ link_mode = ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK;
+
+ switch (link_mode) {
+ case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX:
+ hw->phy.media_type = e1000_media_type_internal_serdes;
+ break;
+ case E1000_CTRL_EXT_LINK_MODE_GMII:
+ hw->phy.media_type = e1000_media_type_copper;
+ break;
+ case E1000_CTRL_EXT_LINK_MODE_SGMII:
+ /* Get phy control interface type set (MDIO vs. I2C)*/
+ if (e1000_sgmii_uses_mdio_82575(hw)) {
+ hw->phy.media_type = e1000_media_type_copper;
+ dev_spec->sgmii_active = true;
+ break;
+ }
+ /* fall through for I2C based SGMII */
+ case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES:
+ /* read media type from SFP EEPROM */
+ ret_val = e1000_set_sfp_media_type_82575(hw);
+ if ((ret_val != E1000_SUCCESS) ||
+ (hw->phy.media_type == e1000_media_type_unknown)) {
+ /*
+ * If media type was not identified then return media
+ * type defined by the CTRL_EXT settings.
+ */
+ hw->phy.media_type = e1000_media_type_internal_serdes;
+
+ if (link_mode == E1000_CTRL_EXT_LINK_MODE_SGMII) {
+ hw->phy.media_type = e1000_media_type_copper;
+ dev_spec->sgmii_active = true;
+ }
+
+ break;
+ }
+
+ /* do not change link mode for 100BaseFX */
+ if (dev_spec->eth_flags.e100_base_fx)
+ break;
+
+ /* change current link mode setting */
+ ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
+
+ if (hw->phy.media_type == e1000_media_type_copper)
+ ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII;
+ else
+ ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
+
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+
+ break;
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_set_sfp_media_type_82575 - derives SFP module media type.
+ * @hw: pointer to the HW structure
+ *
+ * The media type is chosen based on SFP module.
+ * compatibility flags retrieved from SFP ID EEPROM.
+ **/
+static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_ERR_CONFIG;
+ u32 ctrl_ext = 0;
+ struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+ struct sfp_e1000_flags *eth_flags = &dev_spec->eth_flags;
+ u8 tranceiver_type = 0;
+ s32 timeout = 3;
+
+ /* Turn I2C interface ON and power on sfp cage */
+ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA;
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_I2C_ENA);
+
+ E1000_WRITE_FLUSH(hw);
+
+ /* Read SFP module data */
+ while (timeout) {
+ ret_val = e1000_read_sfp_data_byte(hw,
+ E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_IDENTIFIER_OFFSET),
+ &tranceiver_type);
+ if (ret_val == E1000_SUCCESS)
+ break;
+ msec_delay(100);
+ timeout--;
+ }
+ if (ret_val != E1000_SUCCESS)
+ goto out;
+
+ ret_val = e1000_read_sfp_data_byte(hw,
+ E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_ETH_FLAGS_OFFSET),
+ (u8 *)eth_flags);
+ if (ret_val != E1000_SUCCESS)
+ goto out;
+
+ /* Check if there is some SFP module plugged and powered */
+ if ((tranceiver_type == E1000_SFF_IDENTIFIER_SFP) ||
+ (tranceiver_type == E1000_SFF_IDENTIFIER_SFF)) {
+ dev_spec->module_plugged = true;
+ if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) {
+ hw->phy.media_type = e1000_media_type_internal_serdes;
+ } else if (eth_flags->e100_base_fx) {
+ dev_spec->sgmii_active = true;
+ hw->phy.media_type = e1000_media_type_internal_serdes;
+ } else if (eth_flags->e1000_base_t) {
+ dev_spec->sgmii_active = true;
+ hw->phy.media_type = e1000_media_type_copper;
+ } else {
+ hw->phy.media_type = e1000_media_type_unknown;
+ DEBUGOUT("PHY module has not been recognized\n");
+ goto out;
+ }
+ } else {
+ hw->phy.media_type = e1000_media_type_unknown;
+ }
+ ret_val = E1000_SUCCESS;
+out:
+ /* Restore I2C interface setting */
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+ return ret_val;
+}
+
+/**
+ * e1000_valid_led_default_82575 - Verify a valid default LED config
+ * @hw: pointer to the HW structure
+ * @data: pointer to the NVM (EEPROM)
+ *
+ * Read the EEPROM for the current default LED configuration. If the
+ * LED configuration is not valid, set to a valid LED configuration.
+ **/
+static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_valid_led_default_82575");
+
+ ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ goto out;
+ }
+
+ if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
+ switch (hw->phy.media_type) {
+ case e1000_media_type_internal_serdes:
+ *data = ID_LED_DEFAULT_82575_SERDES;
+ break;
+ case e1000_media_type_copper:
+ default:
+ *data = ID_LED_DEFAULT;
+ break;
+ }
+ }
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_sgmii_active_82575 - Return sgmii state
+ * @hw: pointer to the HW structure
+ *
+ * 82575 silicon has a serialized gigabit media independent interface (sgmii)
+ * which can be enabled for use in the embedded applications. Simply
+ * return the current state of the sgmii interface.
+ **/
+static bool e1000_sgmii_active_82575(struct e1000_hw *hw)
+{
+ struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+ return dev_spec->sgmii_active;
+}
+
+/**
+ * e1000_reset_init_script_82575 - Inits HW defaults after reset
+ * @hw: pointer to the HW structure
+ *
+ * Inits recommended HW defaults after a reset when there is no EEPROM
+ * detected. This is only for the 82575.
+ **/
+static s32 e1000_reset_init_script_82575(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_reset_init_script_82575");
+
+ if (hw->mac.type == e1000_82575) {
+ DEBUGOUT("Running reset init script for 82575\n");
+ /* SerDes configuration via SERDESCTRL */
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x00, 0x0C);
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x01, 0x78);
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x1B, 0x23);
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x23, 0x15);
+
+ /* CCM configuration via CCMCTL register */
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x14, 0x00);
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x10, 0x00);
+
+ /* PCIe lanes configuration */
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x00, 0xEC);
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x61, 0xDF);
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x34, 0x05);
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x2F, 0x81);
+
+ /* PCIe PLL Configuration */
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x02, 0x47);
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x14, 0x00);
+ e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x10, 0x00);
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_mac_addr_82575 - Read device MAC address
+ * @hw: pointer to the HW structure
+ **/
+static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("e1000_read_mac_addr_82575");
+
+ /*
+ * If there's an alternate MAC address place it in RAR0
+ * so that it will override the Si installed default perm
+ * address.
+ */
+ ret_val = e1000_check_alt_mac_addr_generic(hw);
+ if (ret_val)
+ goto out;
+
+ ret_val = e1000_read_mac_addr_generic(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_config_collision_dist_82575 - Configure collision distance
+ * @hw: pointer to the HW structure
+ *
+ * Configures the collision distance to the default value and is used
+ * during link setup.
+ **/
+static void e1000_config_collision_dist_82575(struct e1000_hw *hw)
+{
+ u32 tctl_ext;
+
+ DEBUGFUNC("e1000_config_collision_dist_82575");
+
+ tctl_ext = E1000_READ_REG(hw, E1000_TCTL_EXT);
+
+ tctl_ext &= ~E1000_TCTL_EXT_COLD;
+ tctl_ext |= E1000_COLLISION_DISTANCE << E1000_TCTL_EXT_COLD_SHIFT;
+
+ E1000_WRITE_REG(hw, E1000_TCTL_EXT, tctl_ext);
+ E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ * e1000_power_down_phy_copper_82575 - Remove link during PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, remove the link.
+ **/
+static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+
+ if (!(phy->ops.check_reset_block))
+ return;
+
+ /* If the management interface is not enabled, then power down */
+ if (!(e1000_enable_mng_pass_thru(hw) || phy->ops.check_reset_block(hw)))
+ e1000_power_down_phy_copper(hw);
+
+ return;
+}
+
+/**
+ * e1000_clear_hw_cntrs_82575 - Clear device specific hardware counters
+ * @hw: pointer to the HW structure
+ *
+ * Clears the hardware counters by reading the counter registers.
+ **/
+static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_clear_hw_cntrs_82575");
+
+ e1000_clear_hw_cntrs_base_generic(hw);
+
+ E1000_READ_REG(hw, E1000_PRC64);
+ E1000_READ_REG(hw, E1000_PRC127);
+ E1000_READ_REG(hw, E1000_PRC255);
+ E1000_READ_REG(hw, E1000_PRC511);
+ E1000_READ_REG(hw, E1000_PRC1023);
+ E1000_READ_REG(hw, E1000_PRC1522);
+ E1000_READ_REG(hw, E1000_PTC64);
+ E1000_READ_REG(hw, E1000_PTC127);
+ E1000_READ_REG(hw, E1000_PTC255);
+ E1000_READ_REG(hw, E1000_PTC511);
+ E1000_READ_REG(hw, E1000_PTC1023);
+ E1000_READ_REG(hw, E1000_PTC1522);
+
+ E1000_READ_REG(hw, E1000_ALGNERRC);
+ E1000_READ_REG(hw, E1000_RXERRC);
+ E1000_READ_REG(hw, E1000_TNCRS);
+ E1000_READ_REG(hw, E1000_CEXTERR);
+ E1000_READ_REG(hw, E1000_TSCTC);
+ E1000_READ_REG(hw, E1000_TSCTFC);
+
+ E1000_READ_REG(hw, E1000_MGTPRC);
+ E1000_READ_REG(hw, E1000_MGTPDC);
+ E1000_READ_REG(hw, E1000_MGTPTC);
+
+ E1000_READ_REG(hw, E1000_IAC);
+ E1000_READ_REG(hw, E1000_ICRXOC);
+
+ E1000_READ_REG(hw, E1000_ICRXPTC);
+ E1000_READ_REG(hw, E1000_ICRXATC);
+ E1000_READ_REG(hw, E1000_ICTXPTC);
+ E1000_READ_REG(hw, E1000_ICTXATC);
+ E1000_READ_REG(hw, E1000_ICTXQEC);
+ E1000_READ_REG(hw, E1000_ICTXQMTC);
+ E1000_READ_REG(hw, E1000_ICRXDMTC);
+
+ E1000_READ_REG(hw, E1000_CBTMPC);
+ E1000_READ_REG(hw, E1000_HTDPMC);
+ E1000_READ_REG(hw, E1000_CBRMPC);
+ E1000_READ_REG(hw, E1000_RPTHC);
+ E1000_READ_REG(hw, E1000_HGPTC);
+ E1000_READ_REG(hw, E1000_HTCBDPC);
+ E1000_READ_REG(hw, E1000_HGORCL);
+ E1000_READ_REG(hw, E1000_HGORCH);
+ E1000_READ_REG(hw, E1000_HGOTCL);
+ E1000_READ_REG(hw, E1000_HGOTCH);
+ E1000_READ_REG(hw, E1000_LENERRS);
+
+ /* This register should not be read in copper configurations */
+ if ((hw->phy.media_type == e1000_media_type_internal_serdes) ||
+ e1000_sgmii_active_82575(hw))
+ E1000_READ_REG(hw, E1000_SCVPC);
+}
+
+/**
+ * e1000_rx_fifo_flush_82575 - Clean rx fifo after Rx enable
+ * @hw: pointer to the HW structure
+ *
+ * After rx enable if managability is enabled then there is likely some
+ * bad data at the start of the fifo and possibly in the DMA fifo. This
+ * function clears the fifos and flushes any packets that came in as rx was
+ * being enabled.
+ **/
+void e1000_rx_fifo_flush_82575(struct e1000_hw *hw)
+{
+ u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
+ int i, ms_wait;
+
+ DEBUGFUNC("e1000_rx_fifo_workaround_82575");
+ if (hw->mac.type != e1000_82575 ||
+ !(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN))
+ return;
+
+ /* Disable all Rx queues */
+ for (i = 0; i < 4; i++) {
+ rxdctl[i] = E1000_READ_REG(hw, E1000_RXDCTL(i));
+ E1000_WRITE_REG(hw, E1000_RXDCTL(i),
+ rxdctl[i] & ~E1000_RXDCTL_QUEUE_ENABLE);
+ }
+ /* Poll all queues to verify they have shut down */
+ for (ms_wait = 0; ms_wait < 10; ms_wait++) {
+ msec_delay(1);
+ rx_enabled = 0;
+ for (i = 0; i < 4; i++)
+ rx_enabled |= E1000_READ_REG(hw, E1000_RXDCTL(i));
+ if (!(rx_enabled & E1000_RXDCTL_QUEUE_ENABLE))
+ break;
+ }
+
+ if (ms_wait == 10)
+ DEBUGOUT("Queue disable timed out after 10ms\n");
+
+ /* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all
+ * incoming packets are rejected. Set enable and wait 2ms so that
+ * any packet that was coming in as RCTL.EN was set is flushed
+ */
+ rfctl = E1000_READ_REG(hw, E1000_RFCTL);
+ E1000_WRITE_REG(hw, E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF);
+
+ rlpml = E1000_READ_REG(hw, E1000_RLPML);
+ E1000_WRITE_REG(hw, E1000_RLPML, 0);
+
+ rctl = E1000_READ_REG(hw, E1000_RCTL);
+ temp_rctl = rctl & ~(E1000_RCTL_EN | E1000_RCTL_SBP);
+ temp_rctl |= E1000_RCTL_LPE;
+
+ E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl);
+ E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl | E1000_RCTL_EN);
+ E1000_WRITE_FLUSH(hw);
+ msec_delay(2);
+
+ /* Enable Rx queues that were previously enabled and restore our
+ * previous state
+ */
+ for (i = 0; i < 4; i++)
+ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl[i]);
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+ E1000_WRITE_FLUSH(hw);
+
+ E1000_WRITE_REG(hw, E1000_RLPML, rlpml);
+ E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
+
+ /* Flush receive errors generated by workaround */
+ E1000_READ_REG(hw, E1000_ROC);
+ E1000_READ_REG(hw, E1000_RNBC);
+ E1000_READ_REG(hw, E1000_MPC);
+}
+
+/**
+ * e1000_set_pcie_completion_timeout - set pci-e completion timeout
+ * @hw: pointer to the HW structure
+ *
+ * The defaults for 82575 and 82576 should be in the range of 50us to 50ms,
+ * however the hardware default for these parts is 500us to 1ms which is less
+ * than the 10ms recommended by the pci-e spec. To address this we need to
+ * increase the value to either 10ms to 200ms for capability version 1 config,
+ * or 16ms to 55ms for version 2.
+ **/
+static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw)
+{
+ u32 gcr = E1000_READ_REG(hw, E1000_GCR);
+ s32 ret_val = E1000_SUCCESS;
+ u16 pcie_devctl2;
+
+ /* only take action if timeout value is defaulted to 0 */
+ if (gcr & E1000_GCR_CMPL_TMOUT_MASK)
+ goto out;
+
+ /*
+ * if capababilities version is type 1 we can write the
+ * timeout of 10ms to 200ms through the GCR register
+ */
+ if (!(gcr & E1000_GCR_CAP_VER2)) {
+ gcr |= E1000_GCR_CMPL_TMOUT_10ms;
+ goto out;
+ }
+
+ /*
+ * for version 2 capabilities we need to write the config space
+ * directly in order to set the completion timeout value for
+ * 16ms to 55ms
+ */
+ ret_val = e1000_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+ &pcie_devctl2);
+ if (ret_val)
+ goto out;
+
+ pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
+
+ ret_val = e1000_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+ &pcie_devctl2);
+out:
+ /* disable completion timeout resend */
+ gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND;
+
+ E1000_WRITE_REG(hw, E1000_GCR, gcr);
+ return ret_val;
+}
+
+/**
+ * e1000_vmdq_set_anti_spoofing_pf - enable or disable anti-spoofing
+ * @hw: pointer to the hardware struct
+ * @enable: state to enter, either enabled or disabled
+ * @pf: Physical Function pool - do not set anti-spoofing for the PF
+ *
+ * enables/disables L2 switch anti-spoofing functionality.
+ **/
+void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf)
+{
+ u32 reg_val, reg_offset;
+
+ switch (hw->mac.type) {
+ case e1000_82576:
+ reg_offset = E1000_DTXSWC;
+ break;
+ case e1000_i350:
+ case e1000_i354:
+ reg_offset = E1000_TXSWC;
+ break;
+ default:
+ return;
+ }
+
+ reg_val = E1000_READ_REG(hw, reg_offset);
+ if (enable) {
+ reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK |
+ E1000_DTXSWC_VLAN_SPOOF_MASK);
+ /* The PF can spoof - it has to in order to
+ * support emulation mode NICs
+ */
+ reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS));
+ } else {
+ reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK |
+ E1000_DTXSWC_VLAN_SPOOF_MASK);
+ }
+ E1000_WRITE_REG(hw, reg_offset, reg_val);
+}
+
+/**
+ * e1000_vmdq_set_loopback_pf - enable or disable vmdq loopback
+ * @hw: pointer to the hardware struct
+ * @enable: state to enter, either enabled or disabled
+ *
+ * enables/disables L2 switch loopback functionality.
+ **/
+void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable)
+{
+ u32 dtxswc;
+
+ switch (hw->mac.type) {
+ case e1000_82576:
+ dtxswc = E1000_READ_REG(hw, E1000_DTXSWC);
+ if (enable)
+ dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+ else
+ dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+ E1000_WRITE_REG(hw, E1000_DTXSWC, dtxswc);
+ break;
+ case e1000_i350:
+ case e1000_i354:
+ dtxswc = E1000_READ_REG(hw, E1000_TXSWC);
+ if (enable)
+ dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+ else
+ dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+ E1000_WRITE_REG(hw, E1000_TXSWC, dtxswc);
+ break;
+ default:
+ /* Currently no other hardware supports loopback */
+ break;
+ }
+
+
+}
+
+/**
+ * e1000_vmdq_set_replication_pf - enable or disable vmdq replication
+ * @hw: pointer to the hardware struct
+ * @enable: state to enter, either enabled or disabled
+ *
+ * enables/disables replication of packets across multiple pools.
+ **/
+void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable)
+{
+ u32 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
+
+ if (enable)
+ vt_ctl |= E1000_VT_CTL_VM_REPL_EN;
+ else
+ vt_ctl &= ~E1000_VT_CTL_VM_REPL_EN;
+
+ E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
+}
+
+/**
+ * e1000_read_phy_reg_82580 - Read 82580 MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Reads the MDI control register in the PHY at offset and stores the
+ * information read to data.
+ **/
+static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_read_phy_reg_82580");
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ goto out;
+
+ ret_val = e1000_read_phy_reg_mdic(hw, offset, data);
+
+ hw->phy.ops.release(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_write_phy_reg_82580 - Write 82580 MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write to register at offset
+ *
+ * Writes data to MDI control register in the PHY at offset.
+ **/
+static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_write_phy_reg_82580");
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ goto out;
+
+ ret_val = e1000_write_phy_reg_mdic(hw, offset, data);
+
+ hw->phy.ops.release(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits
+ * @hw: pointer to the HW structure
+ *
+ * This resets the the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on
+ * the values found in the EEPROM. This addresses an issue in which these
+ * bits are not restored from EEPROM after reset.
+ **/
+static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+ u32 mdicnfg;
+ u16 nvm_data = 0;
+
+ DEBUGFUNC("e1000_reset_mdicnfg_82580");
+
+ if (hw->mac.type != e1000_82580)
+ goto out;
+ if (!e1000_sgmii_active_82575(hw))
+ goto out;
+
+ ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
+ NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
+ &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ goto out;
+ }
+
+ mdicnfg = E1000_READ_REG(hw, E1000_MDICNFG);
+ if (nvm_data & NVM_WORD24_EXT_MDIO)
+ mdicnfg |= E1000_MDICNFG_EXT_MDIO;
+ if (nvm_data & NVM_WORD24_COM_MDIO)
+ mdicnfg |= E1000_MDICNFG_COM_MDIO;
+ E1000_WRITE_REG(hw, E1000_MDICNFG, mdicnfg);
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_reset_hw_82580 - Reset hardware
+ * @hw: pointer to the HW structure
+ *
+ * This resets function or entire device (all ports, etc.)
+ * to a known state.
+ **/
+static s32 e1000_reset_hw_82580(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+ /* BH SW mailbox bit in SW_FW_SYNC */
+ u16 swmbsw_mask = E1000_SW_SYNCH_MB;
+ u32 ctrl;
+ bool global_device_reset = hw->dev_spec._82575.global_device_reset;
+
+ DEBUGFUNC("e1000_reset_hw_82580");
+
+ hw->dev_spec._82575.global_device_reset = false;
+
+ /* 82580 does not reliably do global_device_reset due to hw errata */
+ if (hw->mac.type == e1000_82580)
+ global_device_reset = false;
+
+ /* Get current control state. */
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+
+ /*
+ * Prevent the PCI-E bus from sticking if there is no TLP connection
+ * on the last TLP read/write transaction when MAC is reset.
+ */
+ ret_val = e1000_disable_pcie_master_generic(hw);
+ if (ret_val)
+ DEBUGOUT("PCI-E Master disable polling has failed.\n");
+
+ DEBUGOUT("Masking off all interrupts\n");
+ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
+ E1000_WRITE_REG(hw, E1000_RCTL, 0);
+ E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
+ E1000_WRITE_FLUSH(hw);
+
+ msec_delay(10);
+
+ /* Determine whether or not a global dev reset is requested */
+ if (global_device_reset && hw->mac.ops.acquire_swfw_sync(hw,
+ swmbsw_mask))
+ global_device_reset = false;
+
+ if (global_device_reset && !(E1000_READ_REG(hw, E1000_STATUS) &
+ E1000_STAT_DEV_RST_SET))
+ ctrl |= E1000_CTRL_DEV_RST;
+ else
+ ctrl |= E1000_CTRL_RST;
+
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+ E1000_WRITE_FLUSH(hw);
+
+ /* Add delay to insure DEV_RST has time to complete */
+ if (global_device_reset)
+ msec_delay(5);
+
+ ret_val = e1000_get_auto_rd_done_generic(hw);
+ if (ret_val) {
+ /*
+ * When auto config read does not complete, do not
+ * return with an error. This can happen in situations
+ * where there is no eeprom and prevents getting link.
+ */
+ DEBUGOUT("Auto Read Done did not complete\n");
+ }
+
+ /* clear global device reset status bit */
+ E1000_WRITE_REG(hw, E1000_STATUS, E1000_STAT_DEV_RST_SET);
+
+ /* Clear any pending interrupt events. */
+ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
+ E1000_READ_REG(hw, E1000_ICR);
+
+ ret_val = e1000_reset_mdicnfg_82580(hw);
+ if (ret_val)
+ DEBUGOUT("Could not reset MDICNFG based on EEPROM\n");
+
+ /* Install any alternate MAC address into RAR0 */
+ ret_val = e1000_check_alt_mac_addr_generic(hw);
+
+ /* Release semaphore */
+ if (global_device_reset)
+ hw->mac.ops.release_swfw_sync(hw, swmbsw_mask);
+
+ return ret_val;
+}
+
+/**
+ * e1000_rxpbs_adjust_82580 - adjust RXPBS value to reflect actual Rx PBA size
+ * @data: data received by reading RXPBS register
+ *
+ * The 82580 uses a table based approach for packet buffer allocation sizes.
+ * This function converts the retrieved value into the correct table value
+ * 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7
+ * 0x0 36 72 144 1 2 4 8 16
+ * 0x8 35 70 140 rsv rsv rsv rsv rsv
+ */
+u16 e1000_rxpbs_adjust_82580(u32 data)
+{
+ u16 ret_val = 0;
+
+ if (data < E1000_82580_RXPBS_TABLE_SIZE)
+ ret_val = e1000_82580_rxpbs_table[data];
+
+ return ret_val;
+}
+
+/**
+ * e1000_validate_nvm_checksum_with_offset - Validate EEPROM
+ * checksum
+ * @hw: pointer to the HW structure
+ * @offset: offset in words of the checksum protected region
+ *
+ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ * and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ **/
+s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset)
+{
+ s32 ret_val = E1000_SUCCESS;
+ u16 checksum = 0;
+ u16 i, nvm_data;
+
+ DEBUGFUNC("e1000_validate_nvm_checksum_with_offset");
+
+ for (i = offset; i < ((NVM_CHECKSUM_REG + offset) + 1); i++) {
+ ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ goto out;
+ }
+ checksum += nvm_data;
+ }
+
+ if (checksum != (u16) NVM_SUM) {
+ DEBUGOUT("NVM Checksum Invalid\n");
+ ret_val = -E1000_ERR_NVM;
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_update_nvm_checksum_with_offset - Update EEPROM
+ * checksum
+ * @hw: pointer to the HW structure
+ * @offset: offset in words of the checksum protected region
+ *
+ * Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ * up to the checksum. Then calculates the EEPROM checksum and writes the
+ * value to the EEPROM.
+ **/
+s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset)
+{
+ s32 ret_val;
+ u16 checksum = 0;
+ u16 i, nvm_data;
+
+ DEBUGFUNC("e1000_update_nvm_checksum_with_offset");
+
+ for (i = offset; i < (NVM_CHECKSUM_REG + offset); i++) {
+ ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error while updating checksum.\n");
+ goto out;
+ }
+ checksum += nvm_data;
+ }
+ checksum = (u16) NVM_SUM - checksum;
+ ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1,
+ &checksum);
+ if (ret_val)
+ DEBUGOUT("NVM Write Error while updating checksum.\n");
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_validate_nvm_checksum_82580 - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM section checksum by reading/adding each word of
+ * the EEPROM and then verifies that the sum of the EEPROM is
+ * equal to 0xBABA.
+ **/
+static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+ u16 eeprom_regions_count = 1;
+ u16 j, nvm_data;
+ u16 nvm_offset;
+
+ DEBUGFUNC("e1000_validate_nvm_checksum_82580");
+
+ ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ goto out;
+ }
+
+ if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) {
+ /* if chekcsums compatibility bit is set validate checksums
+ * for all 4 ports. */
+ eeprom_regions_count = 4;
+ }
+
+ for (j = 0; j < eeprom_regions_count; j++) {
+ nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+ ret_val = e1000_validate_nvm_checksum_with_offset(hw,
+ nvm_offset);
+ if (ret_val != E1000_SUCCESS)
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_update_nvm_checksum_82580 - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM section checksums for all 4 ports by reading/adding
+ * each word of the EEPROM up to the checksum. Then calculates the EEPROM
+ * checksum and writes the value to the EEPROM.
+ **/
+static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw)
+{
+ s32 ret_val;
+ u16 j, nvm_data;
+ u16 nvm_offset;
+
+ DEBUGFUNC("e1000_update_nvm_checksum_82580");
+
+ ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error while updating checksum compatibility bit.\n");
+ goto out;
+ }
+
+ if (!(nvm_data & NVM_COMPATIBILITY_BIT_MASK)) {
+ /* set compatibility bit to validate checksums appropriately */
+ nvm_data = nvm_data | NVM_COMPATIBILITY_BIT_MASK;
+ ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1,
+ &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Write Error while updating checksum compatibility bit.\n");
+ goto out;
+ }
+ }
+
+ for (j = 0; j < 4; j++) {
+ nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+ ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset);
+ if (ret_val)
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_validate_nvm_checksum_i350 - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM section checksum by reading/adding each word of
+ * the EEPROM and then verifies that the sum of the EEPROM is
+ * equal to 0xBABA.
+ **/
+static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+ u16 j;
+ u16 nvm_offset;
+
+ DEBUGFUNC("e1000_validate_nvm_checksum_i350");
+
+ for (j = 0; j < 4; j++) {
+ nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+ ret_val = e1000_validate_nvm_checksum_with_offset(hw,
+ nvm_offset);
+ if (ret_val != E1000_SUCCESS)
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_update_nvm_checksum_i350 - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM section checksums for all 4 ports by reading/adding
+ * each word of the EEPROM up to the checksum. Then calculates the EEPROM
+ * checksum and writes the value to the EEPROM.
+ **/
+static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+ u16 j;
+ u16 nvm_offset;
+
+ DEBUGFUNC("e1000_update_nvm_checksum_i350");
+
+ for (j = 0; j < 4; j++) {
+ nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+ ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset);
+ if (ret_val != E1000_SUCCESS)
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * __e1000_access_emi_reg - Read/write EMI register
+ * @hw: pointer to the HW structure
+ * @addr: EMI address to program
+ * @data: pointer to value to read/write from/to the EMI address
+ * @read: boolean flag to indicate read or write
+ **/
+static s32 __e1000_access_emi_reg(struct e1000_hw *hw, u16 address,
+ u16 *data, bool read)
+{
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("__e1000_access_emi_reg");
+
+ ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address);
+ if (ret_val)
+ return ret_val;
+
+ if (read)
+ ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data);
+ else
+ ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data);
+
+ return ret_val;
+}
+
+/**
+ * e1000_read_emi_reg - Read Extended Management Interface register
+ * @hw: pointer to the HW structure
+ * @addr: EMI address to program
+ * @data: value to be read from the EMI address
+ **/
+s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data)
+{
+ DEBUGFUNC("e1000_read_emi_reg");
+
+ return __e1000_access_emi_reg(hw, addr, data, true);
+}
+
+/**
+ * e1000_set_eee_i350 - Enable/disable EEE support
+ * @hw: pointer to the HW structure
+ *
+ * Enable/disable EEE based on setting in dev_spec structure.
+ *
+ **/
+s32 e1000_set_eee_i350(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+ u32 ipcnfg, eeer;
+
+ DEBUGFUNC("e1000_set_eee_i350");
+
+ if ((hw->mac.type < e1000_i350) ||
+ (hw->phy.media_type != e1000_media_type_copper))
+ goto out;
+ ipcnfg = E1000_READ_REG(hw, E1000_IPCNFG);
+ eeer = E1000_READ_REG(hw, E1000_EEER);
+
+ /* enable or disable per user setting */
+ if (!(hw->dev_spec._82575.eee_disable)) {
+ u32 eee_su = E1000_READ_REG(hw, E1000_EEE_SU);
+
+ ipcnfg |= (E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN);
+ eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN |
+ E1000_EEER_LPI_FC);
+
+ /* This bit should not be set in normal operation. */
+ if (eee_su & E1000_EEE_SU_LPI_CLK_STP)
+ DEBUGOUT("LPI Clock Stop Bit should not be set!\n");
+ } else {
+ ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN);
+ eeer &= ~(E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN |
+ E1000_EEER_LPI_FC);
+ }
+ E1000_WRITE_REG(hw, E1000_IPCNFG, ipcnfg);
+ E1000_WRITE_REG(hw, E1000_EEER, eeer);
+ E1000_READ_REG(hw, E1000_IPCNFG);
+ E1000_READ_REG(hw, E1000_EEER);
+out:
+
+ return ret_val;
+}
+
+/**
+ * e1000_set_eee_i354 - Enable/disable EEE support
+ * @hw: pointer to the HW structure
+ *
+ * Enable/disable EEE legacy mode based on setting in dev_spec structure.
+ *
+ **/
+s32 e1000_set_eee_i354(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val = E1000_SUCCESS;
+ u16 phy_data;
+
+ DEBUGFUNC("e1000_set_eee_i354");
+
+ if ((hw->phy.media_type != e1000_media_type_copper) ||
+ ((phy->id != M88E1543_E_PHY_ID)))
+ goto out;
+
+ if (!hw->dev_spec._82575.eee_disable) {
+ /* Switch to PHY page 18. */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 18);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.read_reg(hw, E1000_M88E1543_EEE_CTRL_1,
+ &phy_data);
+ if (ret_val)
+ goto out;
+
+ phy_data |= E1000_M88E1543_EEE_CTRL_1_MS;
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_EEE_CTRL_1,
+ phy_data);
+ if (ret_val)
+ goto out;
+
+ /* Return the PHY to page 0. */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0);
+ if (ret_val)
+ goto out;
+
+ /* Turn on EEE advertisement. */
+ ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+ E1000_EEE_ADV_DEV_I354,
+ &phy_data);
+ if (ret_val)
+ goto out;
+
+ phy_data |= E1000_EEE_ADV_100_SUPPORTED |
+ E1000_EEE_ADV_1000_SUPPORTED;
+ ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+ E1000_EEE_ADV_DEV_I354,
+ phy_data);
+ } else {
+ /* Turn off EEE advertisement. */
+ ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+ E1000_EEE_ADV_DEV_I354,
+ &phy_data);
+ if (ret_val)
+ goto out;
+
+ phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED |
+ E1000_EEE_ADV_1000_SUPPORTED);
+ ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+ E1000_EEE_ADV_DEV_I354,
+ phy_data);
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_get_eee_status_i354 - Get EEE status
+ * @hw: pointer to the HW structure
+ * @status: EEE status
+ *
+ * Get EEE status by guessing based on whether Tx or Rx LPI indications have
+ * been received.
+ **/
+s32 e1000_get_eee_status_i354(struct e1000_hw *hw, bool *status)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val = E1000_SUCCESS;
+ u16 phy_data;
+
+ DEBUGFUNC("e1000_get_eee_status_i354");
+
+ /* Check if EEE is supported on this device. */
+ if ((hw->phy.media_type != e1000_media_type_copper) ||
+ ((phy->id != M88E1543_E_PHY_ID)))
+ goto out;
+
+ ret_val = e1000_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354,
+ E1000_PCS_STATUS_DEV_I354,
+ &phy_data);
+ if (ret_val)
+ goto out;
+
+ *status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD |
+ E1000_PCS_STATUS_RX_LPI_RCVD) ? true : false;
+
+out:
+ return ret_val;
+}
+
+/* Due to a hw errata, if the host tries to configure the VFTA register
+ * while performing queries from the BMC or DMA, then the VFTA in some
+ * cases won't be written.
+ */
+
+/**
+ * e1000_clear_vfta_i350 - Clear VLAN filter table
+ * @hw: pointer to the HW structure
+ *
+ * Clears the register array which contains the VLAN filter table by
+ * setting all the values to 0.
+ **/
+void e1000_clear_vfta_i350(struct e1000_hw *hw)
+{
+ u32 offset;
+ int i;
+
+ DEBUGFUNC("e1000_clear_vfta_350");
+
+ for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
+ for (i = 0; i < 10; i++)
+ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0);
+
+ E1000_WRITE_FLUSH(hw);
+ }
+}
+
+/**
+ * e1000_write_vfta_i350 - Write value to VLAN filter table
+ * @hw: pointer to the HW structure
+ * @offset: register offset in VLAN filter table
+ * @value: register value written to VLAN filter table
+ *
+ * Writes value at the given offset in the register array which stores
+ * the VLAN filter table.
+ **/
+void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value)
+{
+ int i;
+
+ DEBUGFUNC("e1000_write_vfta_350");
+
+ for (i = 0; i < 10; i++)
+ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value);
+
+ E1000_WRITE_FLUSH(hw);
+}
+
+
+/**
+ * e1000_set_i2c_bb - Enable I2C bit-bang
+ * @hw: pointer to the HW structure
+ *
+ * Enable I2C bit-bang interface
+ *
+ **/
+s32 e1000_set_i2c_bb(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+ u32 ctrl_ext, i2cparams;
+
+ DEBUGFUNC("e1000_set_i2c_bb");
+
+ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_I2C_ENA;
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+ E1000_WRITE_FLUSH(hw);
+
+ i2cparams = E1000_READ_REG(hw, E1000_I2CPARAMS);
+ i2cparams |= E1000_I2CBB_EN;
+ i2cparams |= E1000_I2C_DATA_OE_N;
+ i2cparams |= E1000_I2C_CLK_OE_N;
+ E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cparams);
+ E1000_WRITE_FLUSH(hw);
+
+ return ret_val;
+}
+
+/**
+ * e1000_read_i2c_byte_generic - Reads 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to read
+ * @dev_addr: device address
+ * @data: value read
+ *
+ * Performs byte read operation over I2C interface at
+ * a specified device address.
+ **/
+s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data)
+{
+ s32 status = E1000_SUCCESS;
+ u32 max_retry = 10;
+ u32 retry = 1;
+ u16 swfw_mask = 0;
+
+ bool nack = true;
+
+ DEBUGFUNC("e1000_read_i2c_byte_generic");
+
+ swfw_mask = E1000_SWFW_PHY0_SM;
+
+ do {
+ if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)
+ != E1000_SUCCESS) {
+ status = E1000_ERR_SWFW_SYNC;
+ goto read_byte_out;
+ }
+
+ e1000_i2c_start(hw);
+
+ /* Device Address and write indication */
+ status = e1000_clock_out_i2c_byte(hw, dev_addr);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ status = e1000_get_i2c_ack(hw);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ status = e1000_clock_out_i2c_byte(hw, byte_offset);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ status = e1000_get_i2c_ack(hw);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ e1000_i2c_start(hw);
+
+ /* Device Address and read indication */
+ status = e1000_clock_out_i2c_byte(hw, (dev_addr | 0x1));
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ status = e1000_get_i2c_ack(hw);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ status = e1000_clock_in_i2c_byte(hw, data);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ status = e1000_clock_out_i2c_bit(hw, nack);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ e1000_i2c_stop(hw);
+ break;
+
+fail:
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+ msec_delay(100);
+ e1000_i2c_bus_clear(hw);
+ retry++;
+ if (retry < max_retry)
+ DEBUGOUT("I2C byte read error - Retrying.\n");
+ else
+ DEBUGOUT("I2C byte read error.\n");
+
+ } while (retry < max_retry);
+
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+read_byte_out:
+
+ return status;
+}
+
+/**
+ * e1000_write_i2c_byte_generic - Writes 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to write
+ * @dev_addr: device address
+ * @data: value to write
+ *
+ * Performs byte write operation over I2C interface at
+ * a specified device address.
+ **/
+s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data)
+{
+ s32 status = E1000_SUCCESS;
+ u32 max_retry = 1;
+ u32 retry = 0;
+ u16 swfw_mask = 0;
+
+ DEBUGFUNC("e1000_write_i2c_byte_generic");
+
+ swfw_mask = E1000_SWFW_PHY0_SM;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) {
+ status = E1000_ERR_SWFW_SYNC;
+ goto write_byte_out;
+ }
+
+ do {
+ e1000_i2c_start(hw);
+
+ status = e1000_clock_out_i2c_byte(hw, dev_addr);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ status = e1000_get_i2c_ack(hw);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ status = e1000_clock_out_i2c_byte(hw, byte_offset);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ status = e1000_get_i2c_ack(hw);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ status = e1000_clock_out_i2c_byte(hw, data);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ status = e1000_get_i2c_ack(hw);
+ if (status != E1000_SUCCESS)
+ goto fail;
+
+ e1000_i2c_stop(hw);
+ break;
+
+fail:
+ e1000_i2c_bus_clear(hw);
+ retry++;
+ if (retry < max_retry)
+ DEBUGOUT("I2C byte write error - Retrying.\n");
+ else
+ DEBUGOUT("I2C byte write error.\n");
+ } while (retry < max_retry);
+
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+write_byte_out:
+
+ return status;
+}
+
+/**
+ * e1000_i2c_start - Sets I2C start condition
+ * @hw: pointer to hardware structure
+ *
+ * Sets I2C start condition (High -> Low on SDA while SCL is High)
+ **/
+static void e1000_i2c_start(struct e1000_hw *hw)
+{
+ u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+ DEBUGFUNC("e1000_i2c_start");
+
+ /* Start condition must begin with data and clock high */
+ e1000_set_i2c_data(hw, &i2cctl, 1);
+ e1000_raise_i2c_clk(hw, &i2cctl);
+
+ /* Setup time for start condition (4.7us) */
+ usec_delay(E1000_I2C_T_SU_STA);
+
+ e1000_set_i2c_data(hw, &i2cctl, 0);
+
+ /* Hold time for start condition (4us) */
+ usec_delay(E1000_I2C_T_HD_STA);
+
+ e1000_lower_i2c_clk(hw, &i2cctl);
+
+ /* Minimum low period of clock is 4.7 us */
+ usec_delay(E1000_I2C_T_LOW);
+
+}
+
+/**
+ * e1000_i2c_stop - Sets I2C stop condition
+ * @hw: pointer to hardware structure
+ *
+ * Sets I2C stop condition (Low -> High on SDA while SCL is High)
+ **/
+static void e1000_i2c_stop(struct e1000_hw *hw)
+{
+ u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+ DEBUGFUNC("e1000_i2c_stop");
+
+ /* Stop condition must begin with data low and clock high */
+ e1000_set_i2c_data(hw, &i2cctl, 0);
+ e1000_raise_i2c_clk(hw, &i2cctl);
+
+ /* Setup time for stop condition (4us) */
+ usec_delay(E1000_I2C_T_SU_STO);
+
+ e1000_set_i2c_data(hw, &i2cctl, 1);
+
+ /* bus free time between stop and start (4.7us)*/
+ usec_delay(E1000_I2C_T_BUF);
+}
+
+/**
+ * e1000_clock_in_i2c_byte - Clocks in one byte via I2C
+ * @hw: pointer to hardware structure
+ * @data: data byte to clock in
+ *
+ * Clocks in one byte data via I2C data/clock
+ **/
+static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data)
+{
+ s32 i;
+ bool bit = 0;
+
+ DEBUGFUNC("e1000_clock_in_i2c_byte");
+
+ *data = 0;
+ for (i = 7; i >= 0; i--) {
+ e1000_clock_in_i2c_bit(hw, &bit);
+ *data |= bit << i;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_clock_out_i2c_byte - Clocks out one byte via I2C
+ * @hw: pointer to hardware structure
+ * @data: data byte clocked out
+ *
+ * Clocks out one byte data via I2C data/clock
+ **/
+static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data)
+{
+ s32 status = E1000_SUCCESS;
+ s32 i;
+ u32 i2cctl;
+ bool bit = 0;
+
+ DEBUGFUNC("e1000_clock_out_i2c_byte");
+
+ for (i = 7; i >= 0; i--) {
+ bit = (data >> i) & 0x1;
+ status = e1000_clock_out_i2c_bit(hw, bit);
+
+ if (status != E1000_SUCCESS)
+ break;
+ }
+
+ /* Release SDA line (set high) */
+ i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+ i2cctl |= E1000_I2C_DATA_OE_N;
+ E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl);
+ E1000_WRITE_FLUSH(hw);
+
+ return status;
+}
+
+/**
+ * e1000_get_i2c_ack - Polls for I2C ACK
+ * @hw: pointer to hardware structure
+ *
+ * Clocks in/out one bit via I2C data/clock
+ **/
+static s32 e1000_get_i2c_ack(struct e1000_hw *hw)
+{
+ s32 status = E1000_SUCCESS;
+ u32 i = 0;
+ u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+ u32 timeout = 10;
+ bool ack = true;
+
+ DEBUGFUNC("e1000_get_i2c_ack");
+
+ e1000_raise_i2c_clk(hw, &i2cctl);
+
+ /* Minimum high period of clock is 4us */
+ usec_delay(E1000_I2C_T_HIGH);
+
+ /* Wait until SCL returns high */
+ for (i = 0; i < timeout; i++) {
+ usec_delay(1);
+ i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+ if (i2cctl & E1000_I2C_CLK_IN)
+ break;
+ }
+ if (!(i2cctl & E1000_I2C_CLK_IN))
+ return E1000_ERR_I2C;
+
+ ack = e1000_get_i2c_data(&i2cctl);
+ if (ack) {
+ DEBUGOUT("I2C ack was not received.\n");
+ status = E1000_ERR_I2C;
+ }
+
+ e1000_lower_i2c_clk(hw, &i2cctl);
+
+ /* Minimum low period of clock is 4.7 us */
+ usec_delay(E1000_I2C_T_LOW);
+
+ return status;
+}
+
+/**
+ * e1000_clock_in_i2c_bit - Clocks in one bit via I2C data/clock
+ * @hw: pointer to hardware structure
+ * @data: read data value
+ *
+ * Clocks in one bit via I2C data/clock
+ **/
+static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data)
+{
+ u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+ DEBUGFUNC("e1000_clock_in_i2c_bit");
+
+ e1000_raise_i2c_clk(hw, &i2cctl);
+
+ /* Minimum high period of clock is 4us */
+ usec_delay(E1000_I2C_T_HIGH);
+
+ i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+ *data = e1000_get_i2c_data(&i2cctl);
+
+ e1000_lower_i2c_clk(hw, &i2cctl);
+
+ /* Minimum low period of clock is 4.7 us */
+ usec_delay(E1000_I2C_T_LOW);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock
+ * @hw: pointer to hardware structure
+ * @data: data value to write
+ *
+ * Clocks out one bit via I2C data/clock
+ **/
+static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data)
+{
+ s32 status;
+ u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+ DEBUGFUNC("e1000_clock_out_i2c_bit");
+
+ status = e1000_set_i2c_data(hw, &i2cctl, data);
+ if (status == E1000_SUCCESS) {
+ e1000_raise_i2c_clk(hw, &i2cctl);
+
+ /* Minimum high period of clock is 4us */
+ usec_delay(E1000_I2C_T_HIGH);
+
+ e1000_lower_i2c_clk(hw, &i2cctl);
+
+ /* Minimum low period of clock is 4.7 us.
+ * This also takes care of the data hold time.
+ */
+ usec_delay(E1000_I2C_T_LOW);
+ } else {
+ status = E1000_ERR_I2C;
+ DEBUGOUT1("I2C data was not set to %X\n", data);
+ }
+
+ return status;
+}
+/**
+ * e1000_raise_i2c_clk - Raises the I2C SCL clock
+ * @hw: pointer to hardware structure
+ * @i2cctl: Current value of I2CCTL register
+ *
+ * Raises the I2C clock line '0'->'1'
+ **/
+static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl)
+{
+ DEBUGFUNC("e1000_raise_i2c_clk");
+
+ *i2cctl |= E1000_I2C_CLK_OUT;
+ *i2cctl &= ~E1000_I2C_CLK_OE_N;
+ E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl);
+ E1000_WRITE_FLUSH(hw);
+
+ /* SCL rise time (1000ns) */
+ usec_delay(E1000_I2C_T_RISE);
+}
+
+/**
+ * e1000_lower_i2c_clk - Lowers the I2C SCL clock
+ * @hw: pointer to hardware structure
+ * @i2cctl: Current value of I2CCTL register
+ *
+ * Lowers the I2C clock line '1'->'0'
+ **/
+static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl)
+{
+
+ DEBUGFUNC("e1000_lower_i2c_clk");
+
+ *i2cctl &= ~E1000_I2C_CLK_OUT;
+ *i2cctl &= ~E1000_I2C_CLK_OE_N;
+ E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl);
+ E1000_WRITE_FLUSH(hw);
+
+ /* SCL fall time (300ns) */
+ usec_delay(E1000_I2C_T_FALL);
+}
+
+/**
+ * e1000_set_i2c_data - Sets the I2C data bit
+ * @hw: pointer to hardware structure
+ * @i2cctl: Current value of I2CCTL register
+ * @data: I2C data value (0 or 1) to set
+ *
+ * Sets the I2C data bit
+ **/
+static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data)
+{
+ s32 status = E1000_SUCCESS;
+
+ DEBUGFUNC("e1000_set_i2c_data");
+
+ if (data)
+ *i2cctl |= E1000_I2C_DATA_OUT;
+ else
+ *i2cctl &= ~E1000_I2C_DATA_OUT;
+
+ *i2cctl &= ~E1000_I2C_DATA_OE_N;
+ *i2cctl |= E1000_I2C_CLK_OE_N;
+ E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl);
+ E1000_WRITE_FLUSH(hw);
+
+ /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */
+ usec_delay(E1000_I2C_T_RISE + E1000_I2C_T_FALL + E1000_I2C_T_SU_DATA);
+
+ *i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+ if (data != e1000_get_i2c_data(i2cctl)) {
+ status = E1000_ERR_I2C;
+ DEBUGOUT1("Error - I2C data was not set to %X.\n", data);
+ }
+
+ return status;
+}
+
+/**
+ * e1000_get_i2c_data - Reads the I2C SDA data bit
+ * @hw: pointer to hardware structure
+ * @i2cctl: Current value of I2CCTL register
+ *
+ * Returns the I2C data bit value
+ **/
+static bool e1000_get_i2c_data(u32 *i2cctl)
+{
+ bool data;
+
+ DEBUGFUNC("e1000_get_i2c_data");
+
+ if (*i2cctl & E1000_I2C_DATA_IN)
+ data = 1;
+ else
+ data = 0;
+
+ return data;
+}
+
+/**
+ * e1000_i2c_bus_clear - Clears the I2C bus
+ * @hw: pointer to hardware structure
+ *
+ * Clears the I2C bus by sending nine clock pulses.
+ * Used when data line is stuck low.
+ **/
+void e1000_i2c_bus_clear(struct e1000_hw *hw)
+{
+ u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+ u32 i;
+
+ DEBUGFUNC("e1000_i2c_bus_clear");
+
+ e1000_i2c_start(hw);
+
+ e1000_set_i2c_data(hw, &i2cctl, 1);
+
+ for (i = 0; i < 9; i++) {
+ e1000_raise_i2c_clk(hw, &i2cctl);
+
+ /* Min high period of clock is 4us */
+ usec_delay(E1000_I2C_T_HIGH);
+
+ e1000_lower_i2c_clk(hw, &i2cctl);
+
+ /* Min low period of clock is 4.7us*/
+ usec_delay(E1000_I2C_T_LOW);
+ }
+
+ e1000_i2c_start(hw);
+
+ /* Put the i2c bus back to default state */
+ e1000_i2c_stop(hw);
+}
+
+static const u8 e1000_emc_temp_data[4] = {
+ E1000_EMC_INTERNAL_DATA,
+ E1000_EMC_DIODE1_DATA,
+ E1000_EMC_DIODE2_DATA,
+ E1000_EMC_DIODE3_DATA
+};
+static const u8 e1000_emc_therm_limit[4] = {
+ E1000_EMC_INTERNAL_THERM_LIMIT,
+ E1000_EMC_DIODE1_THERM_LIMIT,
+ E1000_EMC_DIODE2_THERM_LIMIT,
+ E1000_EMC_DIODE3_THERM_LIMIT
+};
+
+/**
+ * e1000_get_thermal_sensor_data_generic - Gathers thermal sensor data
+ * @hw: pointer to hardware structure
+ *
+ * Updates the temperatures in mac.thermal_sensor_data
+ **/
+s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw)
+{
+ s32 status = E1000_SUCCESS;
+ u16 ets_offset;
+ u16 ets_cfg;
+ u16 ets_sensor;
+ u8 num_sensors;
+ u8 sensor_index;
+ u8 sensor_location;
+ u8 i;
+ struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+ DEBUGFUNC("e1000_get_thermal_sensor_data_generic");
+
+ if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0))
+ return E1000_NOT_IMPLEMENTED;
+
+ data->sensor[0].temp = (E1000_READ_REG(hw, E1000_THMJT) & 0xFF);
+
+ /* Return the internal sensor only if ETS is unsupported */
+ e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset);
+ if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
+ return status;
+
+ e1000_read_nvm(hw, ets_offset, 1, &ets_cfg);
+ if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
+ != NVM_ETS_TYPE_EMC)
+ return E1000_NOT_IMPLEMENTED;
+
+ num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
+ if (num_sensors > E1000_MAX_SENSORS)
+ num_sensors = E1000_MAX_SENSORS;
+
+ for (i = 1; i < num_sensors; i++) {
+ e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor);
+ sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
+ NVM_ETS_DATA_INDEX_SHIFT);
+ sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
+ NVM_ETS_DATA_LOC_SHIFT);
+
+ if (sensor_location != 0)
+ hw->phy.ops.read_i2c_byte(hw,
+ e1000_emc_temp_data[sensor_index],
+ E1000_I2C_THERMAL_SENSOR_ADDR,
+ &data->sensor[i].temp);
+ }
+ return status;
+}
+
+/**
+ * e1000_init_thermal_sensor_thresh_generic - Sets thermal sensor thresholds
+ * @hw: pointer to hardware structure
+ *
+ * Sets the thermal sensor thresholds according to the NVM map
+ * and save off the threshold and location values into mac.thermal_sensor_data
+ **/
+s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw)
+{
+ s32 status = E1000_SUCCESS;
+ u16 ets_offset;
+ u16 ets_cfg;
+ u16 ets_sensor;
+ u8 low_thresh_delta;
+ u8 num_sensors;
+ u8 sensor_index;
+ u8 sensor_location;
+ u8 therm_limit;
+ u8 i;
+ struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+ DEBUGFUNC("e1000_init_thermal_sensor_thresh_generic");
+
+ if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0))
+ return E1000_NOT_IMPLEMENTED;
+
+ memset(data, 0, sizeof(struct e1000_thermal_sensor_data));
+
+ data->sensor[0].location = 0x1;
+ data->sensor[0].caution_thresh =
+ (E1000_READ_REG(hw, E1000_THHIGHTC) & 0xFF);
+ data->sensor[0].max_op_thresh =
+ (E1000_READ_REG(hw, E1000_THLOWTC) & 0xFF);
+
+ /* Return the internal sensor only if ETS is unsupported */
+ e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset);
+ if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
+ return status;
+
+ e1000_read_nvm(hw, ets_offset, 1, &ets_cfg);
+ if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
+ != NVM_ETS_TYPE_EMC)
+ return E1000_NOT_IMPLEMENTED;
+
+ low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >>
+ NVM_ETS_LTHRES_DELTA_SHIFT);
+ num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
+
+ for (i = 1; i <= num_sensors; i++) {
+ e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor);
+ sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
+ NVM_ETS_DATA_INDEX_SHIFT);
+ sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
+ NVM_ETS_DATA_LOC_SHIFT);
+ therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK;
+
+ hw->phy.ops.write_i2c_byte(hw,
+ e1000_emc_therm_limit[sensor_index],
+ E1000_I2C_THERMAL_SENSOR_ADDR,
+ therm_limit);
+
+ if ((i < E1000_MAX_SENSORS) && (sensor_location != 0)) {
+ data->sensor[i].location = sensor_location;
+ data->sensor[i].caution_thresh = therm_limit;
+ data->sensor[i].max_op_thresh = therm_limit -
+ low_thresh_delta;
+ }
+ }
+ return status;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
new file mode 100644
index 00000000..1aec75ab
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
@@ -0,0 +1,509 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_82575_H_
+#define _E1000_82575_H_
+
+#define ID_LED_DEFAULT_82575_SERDES ((ID_LED_DEF1_DEF2 << 12) | \
+ (ID_LED_DEF1_DEF2 << 8) | \
+ (ID_LED_DEF1_DEF2 << 4) | \
+ (ID_LED_OFF1_ON2))
+/*
+ * Receive Address Register Count
+ * Number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor.
+ * These entries are also used for MAC-based filtering.
+ */
+/*
+ * For 82576, there are an additional set of RARs that begin at an offset
+ * separate from the first set of RARs.
+ */
+#define E1000_RAR_ENTRIES_82575 16
+#define E1000_RAR_ENTRIES_82576 24
+#define E1000_RAR_ENTRIES_82580 24
+#define E1000_RAR_ENTRIES_I350 32
+#define E1000_SW_SYNCH_MB 0x00000100
+#define E1000_STAT_DEV_RST_SET 0x00100000
+#define E1000_CTRL_DEV_RST 0x20000000
+
+struct e1000_adv_data_desc {
+ __le64 buffer_addr; /* Address of the descriptor's data buffer */
+ union {
+ u32 data;
+ struct {
+ u32 datalen:16; /* Data buffer length */
+ u32 rsvd:4;
+ u32 dtyp:4; /* Descriptor type */
+ u32 dcmd:8; /* Descriptor command */
+ } config;
+ } lower;
+ union {
+ u32 data;
+ struct {
+ u32 status:4; /* Descriptor status */
+ u32 idx:4;
+ u32 popts:6; /* Packet Options */
+ u32 paylen:18; /* Payload length */
+ } options;
+ } upper;
+};
+
+#define E1000_TXD_DTYP_ADV_C 0x2 /* Advanced Context Descriptor */
+#define E1000_TXD_DTYP_ADV_D 0x3 /* Advanced Data Descriptor */
+#define E1000_ADV_TXD_CMD_DEXT 0x20 /* Descriptor extension (0 = legacy) */
+#define E1000_ADV_TUCMD_IPV4 0x2 /* IP Packet Type: 1=IPv4 */
+#define E1000_ADV_TUCMD_IPV6 0x0 /* IP Packet Type: 0=IPv6 */
+#define E1000_ADV_TUCMD_L4T_UDP 0x0 /* L4 Packet TYPE of UDP */
+#define E1000_ADV_TUCMD_L4T_TCP 0x4 /* L4 Packet TYPE of TCP */
+#define E1000_ADV_TUCMD_MKRREQ 0x10 /* Indicates markers are required */
+#define E1000_ADV_DCMD_EOP 0x1 /* End of Packet */
+#define E1000_ADV_DCMD_IFCS 0x2 /* Insert FCS (Ethernet CRC) */
+#define E1000_ADV_DCMD_RS 0x8 /* Report Status */
+#define E1000_ADV_DCMD_VLE 0x40 /* Add VLAN tag */
+#define E1000_ADV_DCMD_TSE 0x80 /* TCP Seg enable */
+/* Extended Device Control */
+#define E1000_CTRL_EXT_NSICR 0x00000001 /* Disable Intr Clear all on read */
+
+struct e1000_adv_context_desc {
+ union {
+ u32 ip_config;
+ struct {
+ u32 iplen:9;
+ u32 maclen:7;
+ u32 vlan_tag:16;
+ } fields;
+ } ip_setup;
+ u32 seq_num;
+ union {
+ u64 l4_config;
+ struct {
+ u32 mkrloc:9;
+ u32 tucmd:11;
+ u32 dtyp:4;
+ u32 adv:8;
+ u32 rsvd:4;
+ u32 idx:4;
+ u32 l4len:8;
+ u32 mss:16;
+ } fields;
+ } l4_setup;
+};
+
+/* SRRCTL bit definitions */
+#define E1000_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */
+#define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00
+#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */
+#define E1000_SRRCTL_DESCTYPE_LEGACY 0x00000000
+#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000
+#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000
+#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000
+#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION 0x06000000
+#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000
+#define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000
+#define E1000_SRRCTL_TIMESTAMP 0x40000000
+#define E1000_SRRCTL_DROP_EN 0x80000000
+
+#define E1000_SRRCTL_BSIZEPKT_MASK 0x0000007F
+#define E1000_SRRCTL_BSIZEHDR_MASK 0x00003F00
+
+#define E1000_TX_HEAD_WB_ENABLE 0x1
+#define E1000_TX_SEQNUM_WB_ENABLE 0x2
+
+#define E1000_MRQC_ENABLE_RSS_4Q 0x00000002
+#define E1000_MRQC_ENABLE_VMDQ 0x00000003
+#define E1000_MRQC_ENABLE_VMDQ_RSS_2Q 0x00000005
+#define E1000_MRQC_RSS_FIELD_IPV4_UDP 0x00400000
+#define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000
+#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000
+#define E1000_MRQC_ENABLE_RSS_8Q 0x00000002
+
+#define E1000_VMRCTL_MIRROR_PORT_SHIFT 8
+#define E1000_VMRCTL_MIRROR_DSTPORT_MASK (7 << \
+ E1000_VMRCTL_MIRROR_PORT_SHIFT)
+#define E1000_VMRCTL_POOL_MIRROR_ENABLE (1 << 0)
+#define E1000_VMRCTL_UPLINK_MIRROR_ENABLE (1 << 1)
+#define E1000_VMRCTL_DOWNLINK_MIRROR_ENABLE (1 << 2)
+
+#define E1000_EICR_TX_QUEUE ( \
+ E1000_EICR_TX_QUEUE0 | \
+ E1000_EICR_TX_QUEUE1 | \
+ E1000_EICR_TX_QUEUE2 | \
+ E1000_EICR_TX_QUEUE3)
+
+#define E1000_EICR_RX_QUEUE ( \
+ E1000_EICR_RX_QUEUE0 | \
+ E1000_EICR_RX_QUEUE1 | \
+ E1000_EICR_RX_QUEUE2 | \
+ E1000_EICR_RX_QUEUE3)
+
+#define E1000_EIMS_RX_QUEUE E1000_EICR_RX_QUEUE
+#define E1000_EIMS_TX_QUEUE E1000_EICR_TX_QUEUE
+
+#define EIMS_ENABLE_MASK ( \
+ E1000_EIMS_RX_QUEUE | \
+ E1000_EIMS_TX_QUEUE | \
+ E1000_EIMS_TCP_TIMER | \
+ E1000_EIMS_OTHER)
+
+/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
+#define E1000_IMIR_PORT_IM_EN 0x00010000 /* TCP port enable */
+#define E1000_IMIR_PORT_BP 0x00020000 /* TCP port check bypass */
+#define E1000_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */
+#define E1000_IMIREXT_CTRL_URG 0x00002000 /* Check URG bit in header */
+#define E1000_IMIREXT_CTRL_ACK 0x00004000 /* Check ACK bit in header */
+#define E1000_IMIREXT_CTRL_PSH 0x00008000 /* Check PSH bit in header */
+#define E1000_IMIREXT_CTRL_RST 0x00010000 /* Check RST bit in header */
+#define E1000_IMIREXT_CTRL_SYN 0x00020000 /* Check SYN bit in header */
+#define E1000_IMIREXT_CTRL_FIN 0x00040000 /* Check FIN bit in header */
+#define E1000_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of ctrl bits */
+
+/* Receive Descriptor - Advanced */
+union e1000_adv_rx_desc {
+ struct {
+ __le64 pkt_addr; /* Packet buffer address */
+ __le64 hdr_addr; /* Header buffer address */
+ } read;
+ struct {
+ struct {
+ union {
+ __le32 data;
+ struct {
+ __le16 pkt_info; /*RSS type, Pkt type*/
+ /* Split Header, header buffer len */
+ __le16 hdr_info;
+ } hs_rss;
+ } lo_dword;
+ union {
+ __le32 rss; /* RSS Hash */
+ struct {
+ __le16 ip_id; /* IP id */
+ __le16 csum; /* Packet Checksum */
+ } csum_ip;
+ } hi_dword;
+ } lower;
+ struct {
+ __le32 status_error; /* ext status/error */
+ __le16 length; /* Packet length */
+ __le16 vlan; /* VLAN tag */
+ } upper;
+ } wb; /* writeback */
+};
+
+#define E1000_RXDADV_RSSTYPE_MASK 0x0000000F
+#define E1000_RXDADV_RSSTYPE_SHIFT 12
+#define E1000_RXDADV_HDRBUFLEN_MASK 0x7FE0
+#define E1000_RXDADV_HDRBUFLEN_SHIFT 5
+#define E1000_RXDADV_SPLITHEADER_EN 0x00001000
+#define E1000_RXDADV_SPH 0x8000
+#define E1000_RXDADV_STAT_TS 0x10000 /* Pkt was time stamped */
+#define E1000_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */
+#define E1000_RXDADV_ERR_HBO 0x00800000
+
+/* RSS Hash results */
+#define E1000_RXDADV_RSSTYPE_NONE 0x00000000
+#define E1000_RXDADV_RSSTYPE_IPV4_TCP 0x00000001
+#define E1000_RXDADV_RSSTYPE_IPV4 0x00000002
+#define E1000_RXDADV_RSSTYPE_IPV6_TCP 0x00000003
+#define E1000_RXDADV_RSSTYPE_IPV6_EX 0x00000004
+#define E1000_RXDADV_RSSTYPE_IPV6 0x00000005
+#define E1000_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006
+#define E1000_RXDADV_RSSTYPE_IPV4_UDP 0x00000007
+#define E1000_RXDADV_RSSTYPE_IPV6_UDP 0x00000008
+#define E1000_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009
+
+/* RSS Packet Types as indicated in the receive descriptor */
+#define E1000_RXDADV_PKTTYPE_NONE 0x00000000
+#define E1000_RXDADV_PKTTYPE_IPV4 0x00000010 /* IPV4 hdr present */
+#define E1000_RXDADV_PKTTYPE_IPV4_EX 0x00000020 /* IPV4 hdr + extensions */
+#define E1000_RXDADV_PKTTYPE_IPV6 0x00000040 /* IPV6 hdr present */
+#define E1000_RXDADV_PKTTYPE_IPV6_EX 0x00000080 /* IPV6 hdr + extensions */
+#define E1000_RXDADV_PKTTYPE_TCP 0x00000100 /* TCP hdr present */
+#define E1000_RXDADV_PKTTYPE_UDP 0x00000200 /* UDP hdr present */
+#define E1000_RXDADV_PKTTYPE_SCTP 0x00000400 /* SCTP hdr present */
+#define E1000_RXDADV_PKTTYPE_NFS 0x00000800 /* NFS hdr present */
+
+#define E1000_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */
+#define E1000_RXDADV_PKTTYPE_IPSEC_AH 0x00002000 /* IPSec AH */
+#define E1000_RXDADV_PKTTYPE_LINKSEC 0x00004000 /* LinkSec Encap */
+#define E1000_RXDADV_PKTTYPE_ETQF 0x00008000 /* PKTTYPE is ETQF index */
+#define E1000_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */
+#define E1000_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */
+
+/* LinkSec results */
+/* Security Processing bit Indication */
+#define E1000_RXDADV_LNKSEC_STATUS_SECP 0x00020000
+#define E1000_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000
+#define E1000_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000
+#define E1000_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000
+#define E1000_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000
+
+#define E1000_RXDADV_IPSEC_STATUS_SECP 0x00020000
+#define E1000_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000
+#define E1000_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000
+#define E1000_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000
+#define E1000_RXDADV_IPSEC_ERROR_AUTHENTICATION_FAILED 0x18000000
+
+/* Transmit Descriptor - Advanced */
+union e1000_adv_tx_desc {
+ struct {
+ __le64 buffer_addr; /* Address of descriptor's data buf */
+ __le32 cmd_type_len;
+ __le32 olinfo_status;
+ } read;
+ struct {
+ __le64 rsvd; /* Reserved */
+ __le32 nxtseq_seed;
+ __le32 status;
+ } wb;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */
+#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */
+#define E1000_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */
+#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_ADVTXD_DCMD_RS 0x08000000 /* Report Status */
+#define E1000_ADVTXD_DCMD_DDTYP_ISCSI 0x10000000 /* DDP hdr type or iSCSI */
+#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */
+#define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */
+#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */
+#define E1000_ADVTXD_MAC_LINKSEC 0x00040000 /* Apply LinkSec on pkt */
+#define E1000_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp pkt */
+#define E1000_ADVTXD_STAT_SN_CRC 0x00000002 /* NXTSEQ/SEED prsnt in WB */
+#define E1000_ADVTXD_IDX_SHIFT 4 /* Adv desc Index shift */
+#define E1000_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */
+#define E1000_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */
+#define E1000_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */
+/* 1st & Last TSO-full iSCSI PDU*/
+#define E1000_ADVTXD_POPTS_ISCO_FULL 0x00001800
+#define E1000_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */
+#define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
+
+/* Context descriptors */
+struct e1000_adv_tx_context_desc {
+ __le32 vlan_macip_lens;
+ __le32 seqnum_seed;
+ __le32 type_tucmd_mlhl;
+ __le32 mss_l4len_idx;
+};
+
+#define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */
+#define E1000_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */
+#define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */
+#define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */
+#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */
+#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */
+#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */
+#define E1000_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */
+/* IPSec Encrypt Enable for ESP */
+#define E1000_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000
+/* Req requires Markers and CRC */
+#define E1000_ADVTXD_TUCMD_MKRREQ 0x00002000
+#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */
+#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */
+/* Adv ctxt IPSec SA IDX mask */
+#define E1000_ADVTXD_IPSEC_SA_INDEX_MASK 0x000000FF
+/* Adv ctxt IPSec ESP len mask */
+#define E1000_ADVTXD_IPSEC_ESP_LEN_MASK 0x000000FF
+
+/* Additional Transmit Descriptor Control definitions */
+#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */
+#define E1000_TXDCTL_SWFLSH 0x04000000 /* Tx Desc. wbk flushing */
+/* Tx Queue Arbitration Priority 0=low, 1=high */
+#define E1000_TXDCTL_PRIORITY 0x08000000
+
+/* Additional Receive Descriptor Control definitions */
+#define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */
+#define E1000_RXDCTL_SWFLSH 0x04000000 /* Rx Desc. wbk flushing */
+
+/* Direct Cache Access (DCA) definitions */
+#define E1000_DCA_CTRL_DCA_ENABLE 0x00000000 /* DCA Enable */
+#define E1000_DCA_CTRL_DCA_DISABLE 0x00000001 /* DCA Disable */
+
+#define E1000_DCA_CTRL_DCA_MODE_CB1 0x00 /* DCA Mode CB1 */
+#define E1000_DCA_CTRL_DCA_MODE_CB2 0x02 /* DCA Mode CB2 */
+
+#define E1000_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */
+#define E1000_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* DCA Rx Desc enable */
+#define E1000_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* DCA Rx Desc header ena */
+#define E1000_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* DCA Rx Desc payload ena */
+#define E1000_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* DCA Rx Desc Relax Order */
+
+#define E1000_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */
+#define E1000_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */
+#define E1000_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */
+#define E1000_DCA_TXCTRL_TX_WB_RO_EN (1 << 11) /* Tx Desc writeback RO bit */
+#define E1000_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */
+
+#define E1000_DCA_TXCTRL_CPUID_MASK_82576 0xFF000000 /* Tx CPUID Mask */
+#define E1000_DCA_RXCTRL_CPUID_MASK_82576 0xFF000000 /* Rx CPUID Mask */
+#define E1000_DCA_TXCTRL_CPUID_SHIFT_82576 24 /* Tx CPUID */
+#define E1000_DCA_RXCTRL_CPUID_SHIFT_82576 24 /* Rx CPUID */
+
+/* Additional interrupt register bit definitions */
+#define E1000_ICR_LSECPNS 0x00000020 /* PN threshold - server */
+#define E1000_IMS_LSECPNS E1000_ICR_LSECPNS /* PN threshold - server */
+#define E1000_ICS_LSECPNS E1000_ICR_LSECPNS /* PN threshold - server */
+
+/* ETQF register bit definitions */
+#define E1000_ETQF_FILTER_ENABLE (1 << 26)
+#define E1000_ETQF_IMM_INT (1 << 29)
+#define E1000_ETQF_1588 (1 << 30)
+#define E1000_ETQF_QUEUE_ENABLE (1 << 31)
+/*
+ * ETQF filter list: one static filter per filter consumer. This is
+ * to avoid filter collisions later. Add new filters
+ * here!!
+ *
+ * Current filters:
+ * EAPOL 802.1x (0x888e): Filter 0
+ */
+#define E1000_ETQF_FILTER_EAPOL 0
+
+#define E1000_FTQF_VF_BP 0x00008000
+#define E1000_FTQF_1588_TIME_STAMP 0x08000000
+#define E1000_FTQF_MASK 0xF0000000
+#define E1000_FTQF_MASK_PROTO_BP 0x10000000
+#define E1000_FTQF_MASK_SOURCE_ADDR_BP 0x20000000
+#define E1000_FTQF_MASK_DEST_ADDR_BP 0x40000000
+#define E1000_FTQF_MASK_SOURCE_PORT_BP 0x80000000
+
+#define E1000_NVM_APME_82575 0x0400
+#define MAX_NUM_VFS 7
+
+#define E1000_DTXSWC_MAC_SPOOF_MASK 0x000000FF /* Per VF MAC spoof cntrl */
+#define E1000_DTXSWC_VLAN_SPOOF_MASK 0x0000FF00 /* Per VF VLAN spoof cntrl */
+#define E1000_DTXSWC_LLE_MASK 0x00FF0000 /* Per VF Local LB enables */
+#define E1000_DTXSWC_VLAN_SPOOF_SHIFT 8
+#define E1000_DTXSWC_LLE_SHIFT 16
+#define E1000_DTXSWC_VMDQ_LOOPBACK_EN (1 << 31) /* global VF LB enable */
+
+/* Easy defines for setting default pool, would normally be left a zero */
+#define E1000_VT_CTL_DEFAULT_POOL_SHIFT 7
+#define E1000_VT_CTL_DEFAULT_POOL_MASK (0x7 << E1000_VT_CTL_DEFAULT_POOL_SHIFT)
+
+/* Other useful VMD_CTL register defines */
+#define E1000_VT_CTL_IGNORE_MAC (1 << 28)
+#define E1000_VT_CTL_DISABLE_DEF_POOL (1 << 29)
+#define E1000_VT_CTL_VM_REPL_EN (1 << 30)
+
+/* Per VM Offload register setup */
+#define E1000_VMOLR_RLPML_MASK 0x00003FFF /* Long Packet Maximum Length mask */
+#define E1000_VMOLR_LPE 0x00010000 /* Accept Long packet */
+#define E1000_VMOLR_RSSE 0x00020000 /* Enable RSS */
+#define E1000_VMOLR_AUPE 0x01000000 /* Accept untagged packets */
+#define E1000_VMOLR_ROMPE 0x02000000 /* Accept overflow multicast */
+#define E1000_VMOLR_ROPE 0x04000000 /* Accept overflow unicast */
+#define E1000_VMOLR_BAM 0x08000000 /* Accept Broadcast packets */
+#define E1000_VMOLR_MPME 0x10000000 /* Multicast promiscuous mode */
+#define E1000_VMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */
+#define E1000_VMOLR_STRCRC 0x80000000 /* CRC stripping enable */
+
+#define E1000_VMOLR_VPE 0x00800000 /* VLAN promiscuous enable */
+#define E1000_VMOLR_UPE 0x20000000 /* Unicast promisuous enable */
+#define E1000_DVMOLR_HIDVLAN 0x20000000 /* Vlan hiding enable */
+#define E1000_DVMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */
+#define E1000_DVMOLR_STRCRC 0x80000000 /* CRC stripping enable */
+
+#define E1000_PBRWAC_WALPB 0x00000007 /* Wrap around event on LAN Rx PB */
+#define E1000_PBRWAC_PBE 0x00000008 /* Rx packet buffer empty */
+
+#define E1000_VLVF_ARRAY_SIZE 32
+#define E1000_VLVF_VLANID_MASK 0x00000FFF
+#define E1000_VLVF_POOLSEL_SHIFT 12
+#define E1000_VLVF_POOLSEL_MASK (0xFF << E1000_VLVF_POOLSEL_SHIFT)
+#define E1000_VLVF_LVLAN 0x00100000
+#define E1000_VLVF_VLANID_ENABLE 0x80000000
+
+#define E1000_VMVIR_VLANA_DEFAULT 0x40000000 /* Always use default VLAN */
+#define E1000_VMVIR_VLANA_NEVER 0x80000000 /* Never insert VLAN tag */
+
+#define E1000_VF_INIT_TIMEOUT 200 /* Number of retries to clear RSTI */
+
+#define E1000_IOVCTL 0x05BBC
+#define E1000_IOVCTL_REUSE_VFQ 0x00000001
+
+#define E1000_RPLOLR_STRVLAN 0x40000000
+#define E1000_RPLOLR_STRCRC 0x80000000
+
+#define E1000_TCTL_EXT_COLD 0x000FFC00
+#define E1000_TCTL_EXT_COLD_SHIFT 10
+
+#define E1000_DTXCTL_8023LL 0x0004
+#define E1000_DTXCTL_VLAN_ADDED 0x0008
+#define E1000_DTXCTL_OOS_ENABLE 0x0010
+#define E1000_DTXCTL_MDP_EN 0x0020
+#define E1000_DTXCTL_SPOOF_INT 0x0040
+
+#define E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT (1 << 14)
+
+#define ALL_QUEUES 0xFFFF
+
+/* Rx packet buffer size defines */
+#define E1000_RXPBS_SIZE_MASK_82576 0x0000007F
+void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable);
+void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf);
+void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable);
+s32 e1000_init_nvm_params_82575(struct e1000_hw *hw);
+
+u16 e1000_rxpbs_adjust_82580(u32 data);
+s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data);
+s32 e1000_set_eee_i350(struct e1000_hw *);
+s32 e1000_set_eee_i354(struct e1000_hw *);
+s32 e1000_get_eee_status_i354(struct e1000_hw *, bool *);
+#define E1000_I2C_THERMAL_SENSOR_ADDR 0xF8
+#define E1000_EMC_INTERNAL_DATA 0x00
+#define E1000_EMC_INTERNAL_THERM_LIMIT 0x20
+#define E1000_EMC_DIODE1_DATA 0x01
+#define E1000_EMC_DIODE1_THERM_LIMIT 0x19
+#define E1000_EMC_DIODE2_DATA 0x23
+#define E1000_EMC_DIODE2_THERM_LIMIT 0x1A
+#define E1000_EMC_DIODE3_DATA 0x2A
+#define E1000_EMC_DIODE3_THERM_LIMIT 0x30
+
+s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw);
+s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw);
+
+/* I2C SDA and SCL timing parameters for standard mode */
+#define E1000_I2C_T_HD_STA 4
+#define E1000_I2C_T_LOW 5
+#define E1000_I2C_T_HIGH 4
+#define E1000_I2C_T_SU_STA 5
+#define E1000_I2C_T_HD_DATA 5
+#define E1000_I2C_T_SU_DATA 1
+#define E1000_I2C_T_RISE 1
+#define E1000_I2C_T_FALL 1
+#define E1000_I2C_T_SU_STO 4
+#define E1000_I2C_T_BUF 5
+
+s32 e1000_set_i2c_bb(struct e1000_hw *hw);
+s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data);
+s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data);
+void e1000_i2c_bus_clear(struct e1000_hw *hw);
+#endif /* _E1000_82575_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
new file mode 100644
index 00000000..6095d3b4
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
@@ -0,0 +1,1159 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+/**
+ * e1000_init_mac_params - Initialize MAC function pointers
+ * @hw: pointer to the HW structure
+ *
+ * This function initializes the function pointers for the MAC
+ * set of functions. Called by drivers or by e1000_setup_init_funcs.
+ **/
+s32 e1000_init_mac_params(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+
+ if (hw->mac.ops.init_params) {
+ ret_val = hw->mac.ops.init_params(hw);
+ if (ret_val) {
+ DEBUGOUT("MAC Initialization Error\n");
+ goto out;
+ }
+ } else {
+ DEBUGOUT("mac.init_mac_params was NULL\n");
+ ret_val = -E1000_ERR_CONFIG;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_init_nvm_params - Initialize NVM function pointers
+ * @hw: pointer to the HW structure
+ *
+ * This function initializes the function pointers for the NVM
+ * set of functions. Called by drivers or by e1000_setup_init_funcs.
+ **/
+s32 e1000_init_nvm_params(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+
+ if (hw->nvm.ops.init_params) {
+ ret_val = hw->nvm.ops.init_params(hw);
+ if (ret_val) {
+ DEBUGOUT("NVM Initialization Error\n");
+ goto out;
+ }
+ } else {
+ DEBUGOUT("nvm.init_nvm_params was NULL\n");
+ ret_val = -E1000_ERR_CONFIG;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_init_phy_params - Initialize PHY function pointers
+ * @hw: pointer to the HW structure
+ *
+ * This function initializes the function pointers for the PHY
+ * set of functions. Called by drivers or by e1000_setup_init_funcs.
+ **/
+s32 e1000_init_phy_params(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+
+ if (hw->phy.ops.init_params) {
+ ret_val = hw->phy.ops.init_params(hw);
+ if (ret_val) {
+ DEBUGOUT("PHY Initialization Error\n");
+ goto out;
+ }
+ } else {
+ DEBUGOUT("phy.init_phy_params was NULL\n");
+ ret_val = -E1000_ERR_CONFIG;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_init_mbx_params - Initialize mailbox function pointers
+ * @hw: pointer to the HW structure
+ *
+ * This function initializes the function pointers for the PHY
+ * set of functions. Called by drivers or by e1000_setup_init_funcs.
+ **/
+s32 e1000_init_mbx_params(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+
+ if (hw->mbx.ops.init_params) {
+ ret_val = hw->mbx.ops.init_params(hw);
+ if (ret_val) {
+ DEBUGOUT("Mailbox Initialization Error\n");
+ goto out;
+ }
+ } else {
+ DEBUGOUT("mbx.init_mbx_params was NULL\n");
+ ret_val = -E1000_ERR_CONFIG;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the mac type of the adapter based on the
+ * device ID stored in the hw structure.
+ * MUST BE FIRST FUNCTION CALLED (explicitly or through
+ * e1000_setup_init_funcs()).
+ **/
+s32 e1000_set_mac_type(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("e1000_set_mac_type");
+
+ switch (hw->device_id) {
+ case E1000_DEV_ID_82575EB_COPPER:
+ case E1000_DEV_ID_82575EB_FIBER_SERDES:
+ case E1000_DEV_ID_82575GB_QUAD_COPPER:
+ mac->type = e1000_82575;
+ break;
+ case E1000_DEV_ID_82576:
+ case E1000_DEV_ID_82576_FIBER:
+ case E1000_DEV_ID_82576_SERDES:
+ case E1000_DEV_ID_82576_QUAD_COPPER:
+ case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
+ case E1000_DEV_ID_82576_NS:
+ case E1000_DEV_ID_82576_NS_SERDES:
+ case E1000_DEV_ID_82576_SERDES_QUAD:
+ mac->type = e1000_82576;
+ break;
+ case E1000_DEV_ID_82580_COPPER:
+ case E1000_DEV_ID_82580_FIBER:
+ case E1000_DEV_ID_82580_SERDES:
+ case E1000_DEV_ID_82580_SGMII:
+ case E1000_DEV_ID_82580_COPPER_DUAL:
+ case E1000_DEV_ID_82580_QUAD_FIBER:
+ case E1000_DEV_ID_DH89XXCC_SGMII:
+ case E1000_DEV_ID_DH89XXCC_SERDES:
+ case E1000_DEV_ID_DH89XXCC_BACKPLANE:
+ case E1000_DEV_ID_DH89XXCC_SFP:
+ mac->type = e1000_82580;
+ break;
+ case E1000_DEV_ID_I350_COPPER:
+ case E1000_DEV_ID_I350_FIBER:
+ case E1000_DEV_ID_I350_SERDES:
+ case E1000_DEV_ID_I350_SGMII:
+ case E1000_DEV_ID_I350_DA4:
+ mac->type = e1000_i350;
+ break;
+ case E1000_DEV_ID_I210_COPPER_FLASHLESS:
+ case E1000_DEV_ID_I210_SERDES_FLASHLESS:
+ case E1000_DEV_ID_I210_COPPER:
+ case E1000_DEV_ID_I210_COPPER_OEM1:
+ case E1000_DEV_ID_I210_COPPER_IT:
+ case E1000_DEV_ID_I210_FIBER:
+ case E1000_DEV_ID_I210_SERDES:
+ case E1000_DEV_ID_I210_SGMII:
+ mac->type = e1000_i210;
+ break;
+ case E1000_DEV_ID_I211_COPPER:
+ mac->type = e1000_i211;
+ break;
+
+ case E1000_DEV_ID_I354_BACKPLANE_1GBPS:
+ case E1000_DEV_ID_I354_SGMII:
+ case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS:
+ mac->type = e1000_i354;
+ break;
+ default:
+ /* Should never have loaded on this device */
+ ret_val = -E1000_ERR_MAC_INIT;
+ break;
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_setup_init_funcs - Initializes function pointers
+ * @hw: pointer to the HW structure
+ * @init_device: true will initialize the rest of the function pointers
+ * getting the device ready for use. false will only set
+ * MAC type and the function pointers for the other init
+ * functions. Passing false will not generate any hardware
+ * reads or writes.
+ *
+ * This function must be called by a driver in order to use the rest
+ * of the 'shared' code files. Called by drivers only.
+ **/
+s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device)
+{
+ s32 ret_val;
+
+ /* Can't do much good without knowing the MAC type. */
+ ret_val = e1000_set_mac_type(hw);
+ if (ret_val) {
+ DEBUGOUT("ERROR: MAC type could not be set properly.\n");
+ goto out;
+ }
+
+ if (!hw->hw_addr) {
+ DEBUGOUT("ERROR: Registers not mapped\n");
+ ret_val = -E1000_ERR_CONFIG;
+ goto out;
+ }
+
+ /*
+ * Init function pointers to generic implementations. We do this first
+ * allowing a driver module to override it afterward.
+ */
+ e1000_init_mac_ops_generic(hw);
+ e1000_init_phy_ops_generic(hw);
+ e1000_init_nvm_ops_generic(hw);
+ e1000_init_mbx_ops_generic(hw);
+
+ /*
+ * Set up the init function pointers. These are functions within the
+ * adapter family file that sets up function pointers for the rest of
+ * the functions in that family.
+ */
+ switch (hw->mac.type) {
+ case e1000_82575:
+ case e1000_82576:
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ e1000_init_function_pointers_82575(hw);
+ break;
+ case e1000_i210:
+ case e1000_i211:
+ e1000_init_function_pointers_i210(hw);
+ break;
+ default:
+ DEBUGOUT("Hardware not supported\n");
+ ret_val = -E1000_ERR_CONFIG;
+ break;
+ }
+
+ /*
+ * Initialize the rest of the function pointers. These require some
+ * register reads/writes in some cases.
+ */
+ if (!(ret_val) && init_device) {
+ ret_val = e1000_init_mac_params(hw);
+ if (ret_val)
+ goto out;
+
+ ret_val = e1000_init_nvm_params(hw);
+ if (ret_val)
+ goto out;
+
+ ret_val = e1000_init_phy_params(hw);
+ if (ret_val)
+ goto out;
+
+ ret_val = e1000_init_mbx_params(hw);
+ if (ret_val)
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_get_bus_info - Obtain bus information for adapter
+ * @hw: pointer to the HW structure
+ *
+ * This will obtain information about the HW bus for which the
+ * adapter is attached and stores it in the hw structure. This is a
+ * function pointer entry point called by drivers.
+ **/
+s32 e1000_get_bus_info(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.get_bus_info)
+ return hw->mac.ops.get_bus_info(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_clear_vfta - Clear VLAN filter table
+ * @hw: pointer to the HW structure
+ *
+ * This clears the VLAN filter table on the adapter. This is a function
+ * pointer entry point called by drivers.
+ **/
+void e1000_clear_vfta(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.clear_vfta)
+ hw->mac.ops.clear_vfta(hw);
+}
+
+/**
+ * e1000_write_vfta - Write value to VLAN filter table
+ * @hw: pointer to the HW structure
+ * @offset: the 32-bit offset in which to write the value to.
+ * @value: the 32-bit value to write at location offset.
+ *
+ * This writes a 32-bit value to a 32-bit offset in the VLAN filter
+ * table. This is a function pointer entry point called by drivers.
+ **/
+void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value)
+{
+ if (hw->mac.ops.write_vfta)
+ hw->mac.ops.write_vfta(hw, offset, value);
+}
+
+/**
+ * e1000_update_mc_addr_list - Update Multicast addresses
+ * @hw: pointer to the HW structure
+ * @mc_addr_list: array of multicast addresses to program
+ * @mc_addr_count: number of multicast addresses to program
+ *
+ * Updates the Multicast Table Array.
+ * The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list,
+ u32 mc_addr_count)
+{
+ if (hw->mac.ops.update_mc_addr_list)
+ hw->mac.ops.update_mc_addr_list(hw, mc_addr_list,
+ mc_addr_count);
+}
+
+/**
+ * e1000_force_mac_fc - Force MAC flow control
+ * @hw: pointer to the HW structure
+ *
+ * Force the MAC's flow control settings. Currently no func pointer exists
+ * and all implementations are handled in the generic version of this
+ * function.
+ **/
+s32 e1000_force_mac_fc(struct e1000_hw *hw)
+{
+ return e1000_force_mac_fc_generic(hw);
+}
+
+/**
+ * e1000_check_for_link - Check/Store link connection
+ * @hw: pointer to the HW structure
+ *
+ * This checks the link condition of the adapter and stores the
+ * results in the hw->mac structure. This is a function pointer entry
+ * point called by drivers.
+ **/
+s32 e1000_check_for_link(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.check_for_link)
+ return hw->mac.ops.check_for_link(hw);
+
+ return -E1000_ERR_CONFIG;
+}
+
+/**
+ * e1000_check_mng_mode - Check management mode
+ * @hw: pointer to the HW structure
+ *
+ * This checks if the adapter has manageability enabled.
+ * This is a function pointer entry point called by drivers.
+ **/
+bool e1000_check_mng_mode(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.check_mng_mode)
+ return hw->mac.ops.check_mng_mode(hw);
+
+ return false;
+}
+
+/**
+ * e1000_mng_write_dhcp_info - Writes DHCP info to host interface
+ * @hw: pointer to the HW structure
+ * @buffer: pointer to the host interface
+ * @length: size of the buffer
+ *
+ * Writes the DHCP information to the host interface.
+ **/
+s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length)
+{
+ return e1000_mng_write_dhcp_info_generic(hw, buffer, length);
+}
+
+/**
+ * e1000_reset_hw - Reset hardware
+ * @hw: pointer to the HW structure
+ *
+ * This resets the hardware into a known state. This is a function pointer
+ * entry point called by drivers.
+ **/
+s32 e1000_reset_hw(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.reset_hw)
+ return hw->mac.ops.reset_hw(hw);
+
+ return -E1000_ERR_CONFIG;
+}
+
+/**
+ * e1000_init_hw - Initialize hardware
+ * @hw: pointer to the HW structure
+ *
+ * This inits the hardware readying it for operation. This is a function
+ * pointer entry point called by drivers.
+ **/
+s32 e1000_init_hw(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.init_hw)
+ return hw->mac.ops.init_hw(hw);
+
+ return -E1000_ERR_CONFIG;
+}
+
+/**
+ * e1000_setup_link - Configures link and flow control
+ * @hw: pointer to the HW structure
+ *
+ * This configures link and flow control settings for the adapter. This
+ * is a function pointer entry point called by drivers. While modules can
+ * also call this, they probably call their own version of this function.
+ **/
+s32 e1000_setup_link(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.setup_link)
+ return hw->mac.ops.setup_link(hw);
+
+ return -E1000_ERR_CONFIG;
+}
+
+/**
+ * e1000_get_speed_and_duplex - Returns current speed and duplex
+ * @hw: pointer to the HW structure
+ * @speed: pointer to a 16-bit value to store the speed
+ * @duplex: pointer to a 16-bit value to store the duplex.
+ *
+ * This returns the speed and duplex of the adapter in the two 'out'
+ * variables passed in. This is a function pointer entry point called
+ * by drivers.
+ **/
+s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex)
+{
+ if (hw->mac.ops.get_link_up_info)
+ return hw->mac.ops.get_link_up_info(hw, speed, duplex);
+
+ return -E1000_ERR_CONFIG;
+}
+
+/**
+ * e1000_setup_led - Configures SW controllable LED
+ * @hw: pointer to the HW structure
+ *
+ * This prepares the SW controllable LED for use and saves the current state
+ * of the LED so it can be later restored. This is a function pointer entry
+ * point called by drivers.
+ **/
+s32 e1000_setup_led(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.setup_led)
+ return hw->mac.ops.setup_led(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_cleanup_led - Restores SW controllable LED
+ * @hw: pointer to the HW structure
+ *
+ * This restores the SW controllable LED to the value saved off by
+ * e1000_setup_led. This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_cleanup_led(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.cleanup_led)
+ return hw->mac.ops.cleanup_led(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_blink_led - Blink SW controllable LED
+ * @hw: pointer to the HW structure
+ *
+ * This starts the adapter LED blinking. Request the LED to be setup first
+ * and cleaned up after. This is a function pointer entry point called by
+ * drivers.
+ **/
+s32 e1000_blink_led(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.blink_led)
+ return hw->mac.ops.blink_led(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_id_led_init - store LED configurations in SW
+ * @hw: pointer to the HW structure
+ *
+ * Initializes the LED config in SW. This is a function pointer entry point
+ * called by drivers.
+ **/
+s32 e1000_id_led_init(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.id_led_init)
+ return hw->mac.ops.id_led_init(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_led_on - Turn on SW controllable LED
+ * @hw: pointer to the HW structure
+ *
+ * Turns the SW defined LED on. This is a function pointer entry point
+ * called by drivers.
+ **/
+s32 e1000_led_on(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.led_on)
+ return hw->mac.ops.led_on(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_led_off - Turn off SW controllable LED
+ * @hw: pointer to the HW structure
+ *
+ * Turns the SW defined LED off. This is a function pointer entry point
+ * called by drivers.
+ **/
+s32 e1000_led_off(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.led_off)
+ return hw->mac.ops.led_off(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_reset_adaptive - Reset adaptive IFS
+ * @hw: pointer to the HW structure
+ *
+ * Resets the adaptive IFS. Currently no func pointer exists and all
+ * implementations are handled in the generic version of this function.
+ **/
+void e1000_reset_adaptive(struct e1000_hw *hw)
+{
+ e1000_reset_adaptive_generic(hw);
+}
+
+/**
+ * e1000_update_adaptive - Update adaptive IFS
+ * @hw: pointer to the HW structure
+ *
+ * Updates adapter IFS. Currently no func pointer exists and all
+ * implementations are handled in the generic version of this function.
+ **/
+void e1000_update_adaptive(struct e1000_hw *hw)
+{
+ e1000_update_adaptive_generic(hw);
+}
+
+/**
+ * e1000_disable_pcie_master - Disable PCI-Express master access
+ * @hw: pointer to the HW structure
+ *
+ * Disables PCI-Express master access and verifies there are no pending
+ * requests. Currently no func pointer exists and all implementations are
+ * handled in the generic version of this function.
+ **/
+s32 e1000_disable_pcie_master(struct e1000_hw *hw)
+{
+ return e1000_disable_pcie_master_generic(hw);
+}
+
+/**
+ * e1000_config_collision_dist - Configure collision distance
+ * @hw: pointer to the HW structure
+ *
+ * Configures the collision distance to the default value and is used
+ * during link setup.
+ **/
+void e1000_config_collision_dist(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.config_collision_dist)
+ hw->mac.ops.config_collision_dist(hw);
+}
+
+/**
+ * e1000_rar_set - Sets a receive address register
+ * @hw: pointer to the HW structure
+ * @addr: address to set the RAR to
+ * @index: the RAR to set
+ *
+ * Sets a Receive Address Register (RAR) to the specified address.
+ **/
+void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index)
+{
+ if (hw->mac.ops.rar_set)
+ hw->mac.ops.rar_set(hw, addr, index);
+}
+
+/**
+ * e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state
+ * @hw: pointer to the HW structure
+ *
+ * Ensures that the MDI/MDIX SW state is valid.
+ **/
+s32 e1000_validate_mdi_setting(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.validate_mdi_setting)
+ return hw->mac.ops.validate_mdi_setting(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_hash_mc_addr - Determines address location in multicast table
+ * @hw: pointer to the HW structure
+ * @mc_addr: Multicast address to hash.
+ *
+ * This hashes an address to determine its location in the multicast
+ * table. Currently no func pointer exists and all implementations
+ * are handled in the generic version of this function.
+ **/
+u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
+{
+ return e1000_hash_mc_addr_generic(hw, mc_addr);
+}
+
+/**
+ * e1000_enable_tx_pkt_filtering - Enable packet filtering on TX
+ * @hw: pointer to the HW structure
+ *
+ * Enables packet filtering on transmit packets if manageability is enabled
+ * and host interface is enabled.
+ * Currently no func pointer exists and all implementations are handled in the
+ * generic version of this function.
+ **/
+bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw)
+{
+ return e1000_enable_tx_pkt_filtering_generic(hw);
+}
+
+/**
+ * e1000_mng_host_if_write - Writes to the manageability host interface
+ * @hw: pointer to the HW structure
+ * @buffer: pointer to the host interface buffer
+ * @length: size of the buffer
+ * @offset: location in the buffer to write to
+ * @sum: sum of the data (not checksum)
+ *
+ * This function writes the buffer content at the offset given on the host if.
+ * It also does alignment considerations to do the writes in most efficient
+ * way. Also fills up the sum of the buffer in *buffer parameter.
+ **/
+s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length,
+ u16 offset, u8 *sum)
+{
+ return e1000_mng_host_if_write_generic(hw, buffer, length, offset, sum);
+}
+
+/**
+ * e1000_mng_write_cmd_header - Writes manageability command header
+ * @hw: pointer to the HW structure
+ * @hdr: pointer to the host interface command header
+ *
+ * Writes the command header after does the checksum calculation.
+ **/
+s32 e1000_mng_write_cmd_header(struct e1000_hw *hw,
+ struct e1000_host_mng_command_header *hdr)
+{
+ return e1000_mng_write_cmd_header_generic(hw, hdr);
+}
+
+/**
+ * e1000_mng_enable_host_if - Checks host interface is enabled
+ * @hw: pointer to the HW structure
+ *
+ * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
+ *
+ * This function checks whether the HOST IF is enabled for command operation
+ * and also checks whether the previous command is completed. It busy waits
+ * in case of previous command is not completed.
+ **/
+s32 e1000_mng_enable_host_if(struct e1000_hw *hw)
+{
+ return e1000_mng_enable_host_if_generic(hw);
+}
+
+/**
+ * e1000_check_reset_block - Verifies PHY can be reset
+ * @hw: pointer to the HW structure
+ *
+ * Checks if the PHY is in a state that can be reset or if manageability
+ * has it tied up. This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_check_reset_block(struct e1000_hw *hw)
+{
+ if (hw->phy.ops.check_reset_block)
+ return hw->phy.ops.check_reset_block(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_phy_reg - Reads PHY register
+ * @hw: pointer to the HW structure
+ * @offset: the register to read
+ * @data: the buffer to store the 16-bit read.
+ *
+ * Reads the PHY register and returns the value in data.
+ * This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+ if (hw->phy.ops.read_reg)
+ return hw->phy.ops.read_reg(hw, offset, data);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_phy_reg - Writes PHY register
+ * @hw: pointer to the HW structure
+ * @offset: the register to write
+ * @data: the value to write.
+ *
+ * Writes the PHY register at offset with the value in data.
+ * This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data)
+{
+ if (hw->phy.ops.write_reg)
+ return hw->phy.ops.write_reg(hw, offset, data);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_release_phy - Generic release PHY
+ * @hw: pointer to the HW structure
+ *
+ * Return if silicon family does not require a semaphore when accessing the
+ * PHY.
+ **/
+void e1000_release_phy(struct e1000_hw *hw)
+{
+ if (hw->phy.ops.release)
+ hw->phy.ops.release(hw);
+}
+
+/**
+ * e1000_acquire_phy - Generic acquire PHY
+ * @hw: pointer to the HW structure
+ *
+ * Return success if silicon family does not require a semaphore when
+ * accessing the PHY.
+ **/
+s32 e1000_acquire_phy(struct e1000_hw *hw)
+{
+ if (hw->phy.ops.acquire)
+ return hw->phy.ops.acquire(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_kmrn_reg - Reads register using Kumeran interface
+ * @hw: pointer to the HW structure
+ * @offset: the register to read
+ * @data: the location to store the 16-bit value read.
+ *
+ * Reads a register out of the Kumeran interface. Currently no func pointer
+ * exists and all implementations are handled in the generic version of
+ * this function.
+ **/
+s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+ return e1000_read_kmrn_reg_generic(hw, offset, data);
+}
+
+/**
+ * e1000_write_kmrn_reg - Writes register using Kumeran interface
+ * @hw: pointer to the HW structure
+ * @offset: the register to write
+ * @data: the value to write.
+ *
+ * Writes a register to the Kumeran interface. Currently no func pointer
+ * exists and all implementations are handled in the generic version of
+ * this function.
+ **/
+s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data)
+{
+ return e1000_write_kmrn_reg_generic(hw, offset, data);
+}
+
+/**
+ * e1000_get_cable_length - Retrieves cable length estimation
+ * @hw: pointer to the HW structure
+ *
+ * This function estimates the cable length and stores them in
+ * hw->phy.min_length and hw->phy.max_length. This is a function pointer
+ * entry point called by drivers.
+ **/
+s32 e1000_get_cable_length(struct e1000_hw *hw)
+{
+ if (hw->phy.ops.get_cable_length)
+ return hw->phy.ops.get_cable_length(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_get_phy_info - Retrieves PHY information from registers
+ * @hw: pointer to the HW structure
+ *
+ * This function gets some information from various PHY registers and
+ * populates hw->phy values with it. This is a function pointer entry
+ * point called by drivers.
+ **/
+s32 e1000_get_phy_info(struct e1000_hw *hw)
+{
+ if (hw->phy.ops.get_info)
+ return hw->phy.ops.get_info(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_hw_reset - Hard PHY reset
+ * @hw: pointer to the HW structure
+ *
+ * Performs a hard PHY reset. This is a function pointer entry point called
+ * by drivers.
+ **/
+s32 e1000_phy_hw_reset(struct e1000_hw *hw)
+{
+ if (hw->phy.ops.reset)
+ return hw->phy.ops.reset(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_commit - Soft PHY reset
+ * @hw: pointer to the HW structure
+ *
+ * Performs a soft PHY reset on those that apply. This is a function pointer
+ * entry point called by drivers.
+ **/
+s32 e1000_phy_commit(struct e1000_hw *hw)
+{
+ if (hw->phy.ops.commit)
+ return hw->phy.ops.commit(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_set_d0_lplu_state - Sets low power link up state for D0
+ * @hw: pointer to the HW structure
+ * @active: boolean used to enable/disable lplu
+ *
+ * Success returns 0, Failure returns 1
+ *
+ * The low power link up (lplu) state is set to the power management level D0
+ * and SmartSpeed is disabled when active is true, else clear lplu for D0
+ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU
+ * is used during Dx states where the power conservation is most important.
+ * During driver activity, SmartSpeed should be enabled so performance is
+ * maintained. This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active)
+{
+ if (hw->phy.ops.set_d0_lplu_state)
+ return hw->phy.ops.set_d0_lplu_state(hw, active);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_set_d3_lplu_state - Sets low power link up state for D3
+ * @hw: pointer to the HW structure
+ * @active: boolean used to enable/disable lplu
+ *
+ * Success returns 0, Failure returns 1
+ *
+ * The low power link up (lplu) state is set to the power management level D3
+ * and SmartSpeed is disabled when active is true, else clear lplu for D3
+ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU
+ * is used during Dx states where the power conservation is most important.
+ * During driver activity, SmartSpeed should be enabled so performance is
+ * maintained. This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active)
+{
+ if (hw->phy.ops.set_d3_lplu_state)
+ return hw->phy.ops.set_d3_lplu_state(hw, active);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_mac_addr - Reads MAC address
+ * @hw: pointer to the HW structure
+ *
+ * Reads the MAC address out of the adapter and stores it in the HW structure.
+ * Currently no func pointer exists and all implementations are handled in the
+ * generic version of this function.
+ **/
+s32 e1000_read_mac_addr(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.read_mac_addr)
+ return hw->mac.ops.read_mac_addr(hw);
+
+ return e1000_read_mac_addr_generic(hw);
+}
+
+/**
+ * e1000_read_pba_string - Read device part number string
+ * @hw: pointer to the HW structure
+ * @pba_num: pointer to device part number
+ * @pba_num_size: size of part number buffer
+ *
+ * Reads the product board assembly (PBA) number from the EEPROM and stores
+ * the value in pba_num.
+ * Currently no func pointer exists and all implementations are handled in the
+ * generic version of this function.
+ **/
+s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size)
+{
+ return e1000_read_pba_string_generic(hw, pba_num, pba_num_size);
+}
+
+/**
+ * e1000_read_pba_length - Read device part number string length
+ * @hw: pointer to the HW structure
+ * @pba_num_size: size of part number buffer
+ *
+ * Reads the product board assembly (PBA) number length from the EEPROM and
+ * stores the value in pba_num.
+ * Currently no func pointer exists and all implementations are handled in the
+ * generic version of this function.
+ **/
+s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size)
+{
+ return e1000_read_pba_length_generic(hw, pba_num_size);
+}
+
+/**
+ * e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum
+ * @hw: pointer to the HW structure
+ *
+ * Validates the NVM checksum is correct. This is a function pointer entry
+ * point called by drivers.
+ **/
+s32 e1000_validate_nvm_checksum(struct e1000_hw *hw)
+{
+ if (hw->nvm.ops.validate)
+ return hw->nvm.ops.validate(hw);
+
+ return -E1000_ERR_CONFIG;
+}
+
+/**
+ * e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the NVM checksum. Currently no func pointer exists and all
+ * implementations are handled in the generic version of this function.
+ **/
+s32 e1000_update_nvm_checksum(struct e1000_hw *hw)
+{
+ if (hw->nvm.ops.update)
+ return hw->nvm.ops.update(hw);
+
+ return -E1000_ERR_CONFIG;
+}
+
+/**
+ * e1000_reload_nvm - Reloads EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the
+ * extended control register.
+ **/
+void e1000_reload_nvm(struct e1000_hw *hw)
+{
+ if (hw->nvm.ops.reload)
+ hw->nvm.ops.reload(hw);
+}
+
+/**
+ * e1000_read_nvm - Reads NVM (EEPROM)
+ * @hw: pointer to the HW structure
+ * @offset: the word offset to read
+ * @words: number of 16-bit words to read
+ * @data: pointer to the properly sized buffer for the data.
+ *
+ * Reads 16-bit chunks of data from the NVM (EEPROM). This is a function
+ * pointer entry point called by drivers.
+ **/
+s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+ if (hw->nvm.ops.read)
+ return hw->nvm.ops.read(hw, offset, words, data);
+
+ return -E1000_ERR_CONFIG;
+}
+
+/**
+ * e1000_write_nvm - Writes to NVM (EEPROM)
+ * @hw: pointer to the HW structure
+ * @offset: the word offset to read
+ * @words: number of 16-bit words to write
+ * @data: pointer to the properly sized buffer for the data.
+ *
+ * Writes 16-bit chunks of data to the NVM (EEPROM). This is a function
+ * pointer entry point called by drivers.
+ **/
+s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+ if (hw->nvm.ops.write)
+ return hw->nvm.ops.write(hw, offset, words, data);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_8bit_ctrl_reg - Writes 8bit Control register
+ * @hw: pointer to the HW structure
+ * @reg: 32bit register offset
+ * @offset: the register to write
+ * @data: the value to write.
+ *
+ * Writes the PHY register at offset with the value in data.
+ * This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset,
+ u8 data)
+{
+ return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data);
+}
+
+/**
+ * e1000_power_up_phy - Restores link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * The phy may be powered down to save power, to turn off link when the
+ * driver is unloaded, or wake on lan is not enabled (among others).
+ **/
+void e1000_power_up_phy(struct e1000_hw *hw)
+{
+ if (hw->phy.ops.power_up)
+ hw->phy.ops.power_up(hw);
+
+ e1000_setup_link(hw);
+}
+
+/**
+ * e1000_power_down_phy - Power down PHY
+ * @hw: pointer to the HW structure
+ *
+ * The phy may be powered down to save power, to turn off link when the
+ * driver is unloaded, or wake on lan is not enabled (among others).
+ **/
+void e1000_power_down_phy(struct e1000_hw *hw)
+{
+ if (hw->phy.ops.power_down)
+ hw->phy.ops.power_down(hw);
+}
+
+/**
+ * e1000_power_up_fiber_serdes_link - Power up serdes link
+ * @hw: pointer to the HW structure
+ *
+ * Power on the optics and PCS.
+ **/
+void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.power_up_serdes)
+ hw->mac.ops.power_up_serdes(hw);
+}
+
+/**
+ * e1000_shutdown_fiber_serdes_link - Remove link during power down
+ * @hw: pointer to the HW structure
+ *
+ * Shutdown the optics and PCS on driver unload.
+ **/
+void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.shutdown_serdes)
+ hw->mac.ops.shutdown_serdes(hw);
+}
+
+/**
+ * e1000_get_thermal_sensor_data - Gathers thermal sensor data
+ * @hw: pointer to hardware structure
+ *
+ * Updates the temperatures in mac.thermal_sensor_data
+ **/
+s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.get_thermal_sensor_data)
+ return hw->mac.ops.get_thermal_sensor_data(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_init_thermal_sensor_thresh - Sets thermal sensor thresholds
+ * @hw: pointer to hardware structure
+ *
+ * Sets the thermal sensor thresholds according to the NVM map
+ **/
+s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw)
+{
+ if (hw->mac.ops.init_thermal_sensor_thresh)
+ return hw->mac.ops.init_thermal_sensor_thresh(hw);
+
+ return E1000_SUCCESS;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
new file mode 100644
index 00000000..b21294ec
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
@@ -0,0 +1,157 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_API_H_
+#define _E1000_API_H_
+
+#include "e1000_hw.h"
+
+extern void e1000_init_function_pointers_82575(struct e1000_hw *hw);
+extern void e1000_rx_fifo_flush_82575(struct e1000_hw *hw);
+extern void e1000_init_function_pointers_vf(struct e1000_hw *hw);
+extern void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw);
+extern void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw);
+extern void e1000_init_function_pointers_i210(struct e1000_hw *hw);
+
+s32 e1000_set_obff_timer(struct e1000_hw *hw, u32 itr);
+s32 e1000_set_mac_type(struct e1000_hw *hw);
+s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device);
+s32 e1000_init_mac_params(struct e1000_hw *hw);
+s32 e1000_init_nvm_params(struct e1000_hw *hw);
+s32 e1000_init_phy_params(struct e1000_hw *hw);
+s32 e1000_init_mbx_params(struct e1000_hw *hw);
+s32 e1000_get_bus_info(struct e1000_hw *hw);
+void e1000_clear_vfta(struct e1000_hw *hw);
+void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value);
+s32 e1000_force_mac_fc(struct e1000_hw *hw);
+s32 e1000_check_for_link(struct e1000_hw *hw);
+s32 e1000_reset_hw(struct e1000_hw *hw);
+s32 e1000_init_hw(struct e1000_hw *hw);
+s32 e1000_setup_link(struct e1000_hw *hw);
+s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex);
+s32 e1000_disable_pcie_master(struct e1000_hw *hw);
+void e1000_config_collision_dist(struct e1000_hw *hw);
+void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index);
+u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr);
+void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list,
+ u32 mc_addr_count);
+s32 e1000_setup_led(struct e1000_hw *hw);
+s32 e1000_cleanup_led(struct e1000_hw *hw);
+s32 e1000_check_reset_block(struct e1000_hw *hw);
+s32 e1000_blink_led(struct e1000_hw *hw);
+s32 e1000_led_on(struct e1000_hw *hw);
+s32 e1000_led_off(struct e1000_hw *hw);
+s32 e1000_id_led_init(struct e1000_hw *hw);
+void e1000_reset_adaptive(struct e1000_hw *hw);
+void e1000_update_adaptive(struct e1000_hw *hw);
+s32 e1000_get_cable_length(struct e1000_hw *hw);
+s32 e1000_validate_mdi_setting(struct e1000_hw *hw);
+s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset,
+ u8 data);
+s32 e1000_get_phy_info(struct e1000_hw *hw);
+void e1000_release_phy(struct e1000_hw *hw);
+s32 e1000_acquire_phy(struct e1000_hw *hw);
+s32 e1000_phy_hw_reset(struct e1000_hw *hw);
+s32 e1000_phy_commit(struct e1000_hw *hw);
+void e1000_power_up_phy(struct e1000_hw *hw);
+void e1000_power_down_phy(struct e1000_hw *hw);
+s32 e1000_read_mac_addr(struct e1000_hw *hw);
+s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size);
+s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size);
+void e1000_reload_nvm(struct e1000_hw *hw);
+s32 e1000_update_nvm_checksum(struct e1000_hw *hw);
+s32 e1000_validate_nvm_checksum(struct e1000_hw *hw);
+s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
+s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
+s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active);
+s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active);
+bool e1000_check_mng_mode(struct e1000_hw *hw);
+bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw);
+s32 e1000_mng_enable_host_if(struct e1000_hw *hw);
+s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length,
+ u16 offset, u8 *sum);
+s32 e1000_mng_write_cmd_header(struct e1000_hw *hw,
+ struct e1000_host_mng_command_header *hdr);
+s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length);
+s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw);
+s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw);
+
+
+
+/*
+ * TBI_ACCEPT macro definition:
+ *
+ * This macro requires:
+ * adapter = a pointer to struct e1000_hw
+ * status = the 8 bit status field of the Rx descriptor with EOP set
+ * error = the 8 bit error field of the Rx descriptor with EOP set
+ * length = the sum of all the length fields of the Rx descriptors that
+ * make up the current frame
+ * last_byte = the last byte of the frame DMAed by the hardware
+ * max_frame_length = the maximum frame length we want to accept.
+ * min_frame_length = the minimum frame length we want to accept.
+ *
+ * This macro is a conditional that should be used in the interrupt
+ * handler's Rx processing routine when RxErrors have been detected.
+ *
+ * Typical use:
+ * ...
+ * if (TBI_ACCEPT) {
+ * accept_frame = true;
+ * e1000_tbi_adjust_stats(adapter, MacAddress);
+ * frame_length--;
+ * } else {
+ * accept_frame = false;
+ * }
+ * ...
+ */
+
+/* The carrier extension symbol, as received by the NIC. */
+#define CARRIER_EXTENSION 0x0F
+
+#define TBI_ACCEPT(a, status, errors, length, last_byte, \
+ min_frame_size, max_frame_size) \
+ (e1000_tbi_sbp_enabled_82543(a) && \
+ (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \
+ ((last_byte) == CARRIER_EXTENSION) && \
+ (((status) & E1000_RXD_STAT_VP) ? \
+ (((length) > (min_frame_size - VLAN_TAG_SIZE)) && \
+ ((length) <= (max_frame_size + 1))) : \
+ (((length) > min_frame_size) && \
+ ((length) <= (max_frame_size + VLAN_TAG_SIZE + 1)))))
+
+#ifndef E1000_MAX
+#define E1000_MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+#ifndef E1000_DIVIDE_ROUND_UP
+#define E1000_DIVIDE_ROUND_UP(a, b) (((a) + (b) - 1) / (b)) /* ceil(a/b) */
+#endif
+#endif /* _E1000_API_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
new file mode 100644
index 00000000..63b228c5
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
@@ -0,0 +1,1380 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_DEFINES_H_
+#define _E1000_DEFINES_H_
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define REQ_TX_DESCRIPTOR_MULTIPLE 8
+#define REQ_RX_DESCRIPTOR_MULTIPLE 8
+
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define E1000_WUC_APME 0x00000001 /* APM Enable */
+#define E1000_WUC_PME_EN 0x00000002 /* PME Enable */
+#define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */
+#define E1000_WUC_APMPME 0x00000008 /* Assert PME on APM Wakeup */
+#define E1000_WUC_PHY_WAKE 0x00000100 /* if PHY supports wakeup */
+
+/* Wake Up Filter Control */
+#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
+#define E1000_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */
+#define E1000_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */
+#define E1000_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */
+#define E1000_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */
+#define E1000_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */
+#define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */
+#define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */
+
+/* Wake Up Status */
+#define E1000_WUS_LNKC E1000_WUFC_LNKC
+#define E1000_WUS_MAG E1000_WUFC_MAG
+#define E1000_WUS_EX E1000_WUFC_EX
+#define E1000_WUS_MC E1000_WUFC_MC
+#define E1000_WUS_BC E1000_WUFC_BC
+
+/* Extended Device Control */
+#define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* SW Definable Pin 4 data */
+#define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* SW Definable Pin 6 data */
+#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* SW Definable Pin 3 data */
+#define E1000_CTRL_EXT_SDP6_DIR 0x00000400 /* Direction of SDP6 0=in 1=out */
+#define E1000_CTRL_EXT_SDP3_DIR 0x00000800 /* Direction of SDP3 0=in 1=out */
+#define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */
+/* Physical Func Reset Done Indication */
+#define E1000_CTRL_EXT_PFRSTD 0x00004000
+#define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */
+#define E1000_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */
+#define E1000_CTRL_EXT_DMA_DYN_CLK_EN 0x00080000 /* DMA Dynamic Clk Gating */
+#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000
+/* Offset of the link mode field in Ctrl Ext register */
+#define E1000_CTRL_EXT_LINK_MODE_OFFSET 22
+#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX 0x00400000
+#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000
+#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000
+#define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000
+#define E1000_CTRL_EXT_EIAME 0x01000000
+#define E1000_CTRL_EXT_IRCA 0x00000001
+#define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */
+#define E1000_CTRL_EXT_IAME 0x08000000 /* Int ACK Auto-mask */
+#define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */
+#define E1000_I2CCMD_REG_ADDR_SHIFT 16
+#define E1000_I2CCMD_PHY_ADDR_SHIFT 24
+#define E1000_I2CCMD_OPCODE_READ 0x08000000
+#define E1000_I2CCMD_OPCODE_WRITE 0x00000000
+#define E1000_I2CCMD_READY 0x20000000
+#define E1000_I2CCMD_ERROR 0x80000000
+#define E1000_I2CCMD_SFP_DATA_ADDR(a) (0x0000 + (a))
+#define E1000_I2CCMD_SFP_DIAG_ADDR(a) (0x0100 + (a))
+#define E1000_MAX_SGMII_PHY_REG_ADDR 255
+#define E1000_I2CCMD_PHY_TIMEOUT 200
+#define E1000_IVAR_VALID 0x80
+#define E1000_GPIE_NSICR 0x00000001
+#define E1000_GPIE_MSIX_MODE 0x00000010
+#define E1000_GPIE_EIAME 0x40000000
+#define E1000_GPIE_PBA 0x80000000
+
+/* Receive Descriptor bit definitions */
+#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */
+#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */
+#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */
+#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */
+#define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */
+#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */
+#define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */
+#define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */
+#define E1000_RXD_STAT_IPIDV 0x200 /* IP identification valid */
+#define E1000_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */
+#define E1000_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */
+#define E1000_RXD_ERR_CE 0x01 /* CRC Error */
+#define E1000_RXD_ERR_SE 0x02 /* Symbol Error */
+#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */
+#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */
+#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */
+#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */
+#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */
+#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */
+
+#define E1000_RXDEXT_STATERR_TST 0x00000100 /* Time Stamp taken */
+#define E1000_RXDEXT_STATERR_LB 0x00040000
+#define E1000_RXDEXT_STATERR_CE 0x01000000
+#define E1000_RXDEXT_STATERR_SE 0x02000000
+#define E1000_RXDEXT_STATERR_SEQ 0x04000000
+#define E1000_RXDEXT_STATERR_CXE 0x10000000
+#define E1000_RXDEXT_STATERR_TCPE 0x20000000
+#define E1000_RXDEXT_STATERR_IPE 0x40000000
+#define E1000_RXDEXT_STATERR_RXE 0x80000000
+
+/* mask to determine if packets should be dropped due to frame errors */
+#define E1000_RXD_ERR_FRAME_ERR_MASK ( \
+ E1000_RXD_ERR_CE | \
+ E1000_RXD_ERR_SE | \
+ E1000_RXD_ERR_SEQ | \
+ E1000_RXD_ERR_CXE | \
+ E1000_RXD_ERR_RXE)
+
+/* Same mask, but for extended and packet split descriptors */
+#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
+ E1000_RXDEXT_STATERR_CE | \
+ E1000_RXDEXT_STATERR_SE | \
+ E1000_RXDEXT_STATERR_SEQ | \
+ E1000_RXDEXT_STATERR_CXE | \
+ E1000_RXDEXT_STATERR_RXE)
+
+#define E1000_MRQC_RSS_FIELD_MASK 0xFFFF0000
+#define E1000_MRQC_RSS_FIELD_IPV4_TCP 0x00010000
+#define E1000_MRQC_RSS_FIELD_IPV4 0x00020000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000
+#define E1000_MRQC_RSS_FIELD_IPV6 0x00100000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP 0x00200000
+
+#define E1000_RXDPS_HDRSTAT_HDRSP 0x00008000
+
+/* Management Control */
+#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */
+#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */
+#define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */
+#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */
+#define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */
+/* Enable MAC address filtering */
+#define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000
+/* Enable MNG packets to host memory */
+#define E1000_MANC_EN_MNG2HOST 0x00200000
+
+#define E1000_MANC2H_PORT_623 0x00000020 /* Port 0x26f */
+#define E1000_MANC2H_PORT_664 0x00000040 /* Port 0x298 */
+#define E1000_MDEF_PORT_623 0x00000800 /* Port 0x26f */
+#define E1000_MDEF_PORT_664 0x00000400 /* Port 0x298 */
+
+/* Receive Control */
+#define E1000_RCTL_RST 0x00000001 /* Software reset */
+#define E1000_RCTL_EN 0x00000002 /* enable */
+#define E1000_RCTL_SBP 0x00000004 /* store bad packet */
+#define E1000_RCTL_UPE 0x00000008 /* unicast promisc enable */
+#define E1000_RCTL_MPE 0x00000010 /* multicast promisc enable */
+#define E1000_RCTL_LPE 0x00000020 /* long packet enable */
+#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */
+#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */
+#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */
+#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */
+#define E1000_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */
+#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */
+#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */
+#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */
+#define E1000_RCTL_SZ_2048 0x00000000 /* Rx buffer size 2048 */
+#define E1000_RCTL_SZ_1024 0x00010000 /* Rx buffer size 1024 */
+#define E1000_RCTL_SZ_512 0x00020000 /* Rx buffer size 512 */
+#define E1000_RCTL_SZ_256 0x00030000 /* Rx buffer size 256 */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */
+#define E1000_RCTL_SZ_16384 0x00010000 /* Rx buffer size 16384 */
+#define E1000_RCTL_SZ_8192 0x00020000 /* Rx buffer size 8192 */
+#define E1000_RCTL_SZ_4096 0x00030000 /* Rx buffer size 4096 */
+#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */
+#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */
+#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */
+#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */
+#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */
+#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */
+#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */
+
+/* Use byte values for the following shift parameters
+ * Usage:
+ * psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) &
+ * E1000_PSRCTL_BSIZE0_MASK) |
+ * ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) &
+ * E1000_PSRCTL_BSIZE1_MASK) |
+ * ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) &
+ * E1000_PSRCTL_BSIZE2_MASK) |
+ * ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |;
+ * E1000_PSRCTL_BSIZE3_MASK))
+ * where value0 = [128..16256], default=256
+ * value1 = [1024..64512], default=4096
+ * value2 = [0..64512], default=4096
+ * value3 = [0..64512], default=0
+ */
+
+#define E1000_PSRCTL_BSIZE0_MASK 0x0000007F
+#define E1000_PSRCTL_BSIZE1_MASK 0x00003F00
+#define E1000_PSRCTL_BSIZE2_MASK 0x003F0000
+#define E1000_PSRCTL_BSIZE3_MASK 0x3F000000
+
+#define E1000_PSRCTL_BSIZE0_SHIFT 7 /* Shift _right_ 7 */
+#define E1000_PSRCTL_BSIZE1_SHIFT 2 /* Shift _right_ 2 */
+#define E1000_PSRCTL_BSIZE2_SHIFT 6 /* Shift _left_ 6 */
+#define E1000_PSRCTL_BSIZE3_SHIFT 14 /* Shift _left_ 14 */
+
+/* SWFW_SYNC Definitions */
+#define E1000_SWFW_EEP_SM 0x01
+#define E1000_SWFW_PHY0_SM 0x02
+#define E1000_SWFW_PHY1_SM 0x04
+#define E1000_SWFW_CSR_SM 0x08
+#define E1000_SWFW_PHY2_SM 0x20
+#define E1000_SWFW_PHY3_SM 0x40
+#define E1000_SWFW_SW_MNG_SM 0x400
+
+/* Device Control */
+#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */
+#define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */
+#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master reqs */
+#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */
+#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */
+#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */
+#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */
+#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */
+#define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */
+#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */
+#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */
+#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */
+#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */
+#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */
+#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */
+#define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */
+#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */
+#define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */
+#define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */
+#define E1000_CTRL_RST 0x04000000 /* Global reset */
+#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */
+#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */
+#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */
+#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */
+#define E1000_CTRL_I2C_ENA 0x02000000 /* I2C enable */
+
+
+#define E1000_CONNSW_ENRGSRC 0x4
+#define E1000_CONNSW_PHYSD 0x400
+#define E1000_CONNSW_PHY_PDN 0x800
+#define E1000_CONNSW_SERDESD 0x200
+#define E1000_CONNSW_AUTOSENSE_CONF 0x2
+#define E1000_CONNSW_AUTOSENSE_EN 0x1
+#define E1000_PCS_CFG_PCS_EN 8
+#define E1000_PCS_LCTL_FLV_LINK_UP 1
+#define E1000_PCS_LCTL_FSV_10 0
+#define E1000_PCS_LCTL_FSV_100 2
+#define E1000_PCS_LCTL_FSV_1000 4
+#define E1000_PCS_LCTL_FDV_FULL 8
+#define E1000_PCS_LCTL_FSD 0x10
+#define E1000_PCS_LCTL_FORCE_LINK 0x20
+#define E1000_PCS_LCTL_FORCE_FCTRL 0x80
+#define E1000_PCS_LCTL_AN_ENABLE 0x10000
+#define E1000_PCS_LCTL_AN_RESTART 0x20000
+#define E1000_PCS_LCTL_AN_TIMEOUT 0x40000
+#define E1000_ENABLE_SERDES_LOOPBACK 0x0410
+
+#define E1000_PCS_LSTS_LINK_OK 1
+#define E1000_PCS_LSTS_SPEED_100 2
+#define E1000_PCS_LSTS_SPEED_1000 4
+#define E1000_PCS_LSTS_DUPLEX_FULL 8
+#define E1000_PCS_LSTS_SYNK_OK 0x10
+#define E1000_PCS_LSTS_AN_COMPLETE 0x10000
+
+/* Device Status */
+#define E1000_STATUS_FD 0x00000001 /* Duplex 0=half 1=full */
+#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */
+#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */
+#define E1000_STATUS_FUNC_SHIFT 2
+#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */
+#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */
+#define E1000_STATUS_SPEED_MASK 0x000000C0
+#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */
+#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */
+#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */
+#define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Compltn by NVM */
+#define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */
+#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Master request status */
+#define E1000_STATUS_2P5_SKU 0x00001000 /* Val of 2.5GBE SKU strap */
+#define E1000_STATUS_2P5_SKU_OVER 0x00002000 /* Val of 2.5GBE SKU Over */
+
+#define SPEED_10 10
+#define SPEED_100 100
+#define SPEED_1000 1000
+#define SPEED_2500 2500
+#define HALF_DUPLEX 1
+#define FULL_DUPLEX 2
+
+
+#define ADVERTISE_10_HALF 0x0001
+#define ADVERTISE_10_FULL 0x0002
+#define ADVERTISE_100_HALF 0x0004
+#define ADVERTISE_100_FULL 0x0008
+#define ADVERTISE_1000_HALF 0x0010 /* Not used, just FYI */
+#define ADVERTISE_1000_FULL 0x0020
+
+/* 1000/H is not supported, nor spec-compliant. */
+#define E1000_ALL_SPEED_DUPLEX ( \
+ ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
+ ADVERTISE_100_FULL | ADVERTISE_1000_FULL)
+#define E1000_ALL_NOT_GIG ( \
+ ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
+ ADVERTISE_100_FULL)
+#define E1000_ALL_100_SPEED (ADVERTISE_100_HALF | ADVERTISE_100_FULL)
+#define E1000_ALL_10_SPEED (ADVERTISE_10_HALF | ADVERTISE_10_FULL)
+#define E1000_ALL_HALF_DUPLEX (ADVERTISE_10_HALF | ADVERTISE_100_HALF)
+
+#define AUTONEG_ADVERTISE_SPEED_DEFAULT E1000_ALL_SPEED_DUPLEX
+
+/* LED Control */
+#define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F
+#define E1000_LEDCTL_LED0_MODE_SHIFT 0
+#define E1000_LEDCTL_LED0_IVRT 0x00000040
+#define E1000_LEDCTL_LED0_BLINK 0x00000080
+
+#define E1000_LEDCTL_MODE_LED_ON 0xE
+#define E1000_LEDCTL_MODE_LED_OFF 0xF
+
+/* Transmit Descriptor bit definitions */
+#define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */
+#define E1000_TXD_DTYP_C 0x00000000 /* Context Descriptor */
+#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */
+#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */
+#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */
+#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */
+#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */
+#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */
+#define E1000_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */
+#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */
+#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */
+#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */
+#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */
+#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */
+#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */
+#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */
+#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */
+#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */
+#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */
+#define E1000_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */
+
+/* Transmit Control */
+#define E1000_TCTL_EN 0x00000002 /* enable Tx */
+#define E1000_TCTL_PSP 0x00000008 /* pad short packets */
+#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */
+#define E1000_TCTL_COLD 0x003ff000 /* collision distance */
+#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */
+#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */
+
+/* Transmit Arbitration Count */
+#define E1000_TARC0_ENABLE 0x00000400 /* Enable Tx Queue 0 */
+
+/* SerDes Control */
+#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400
+#define E1000_SCTL_ENABLE_SERDES_LOOPBACK 0x0410
+
+/* Receive Checksum Control */
+#define E1000_RXCSUM_IPOFL 0x00000100 /* IPv4 checksum offload */
+#define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */
+#define E1000_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */
+#define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */
+#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */
+
+/* Header split receive */
+#define E1000_RFCTL_NFSW_DIS 0x00000040
+#define E1000_RFCTL_NFSR_DIS 0x00000080
+#define E1000_RFCTL_ACK_DIS 0x00001000
+#define E1000_RFCTL_EXTEN 0x00008000
+#define E1000_RFCTL_IPV6_EX_DIS 0x00010000
+#define E1000_RFCTL_NEW_IPV6_EXT_DIS 0x00020000
+#define E1000_RFCTL_LEF 0x00040000
+
+/* Collision related configuration parameters */
+#define E1000_COLLISION_THRESHOLD 15
+#define E1000_CT_SHIFT 4
+#define E1000_COLLISION_DISTANCE 63
+#define E1000_COLD_SHIFT 12
+
+/* Default values for the transmit IPG register */
+#define DEFAULT_82543_TIPG_IPGT_FIBER 9
+#define DEFAULT_82543_TIPG_IPGT_COPPER 8
+
+#define E1000_TIPG_IPGT_MASK 0x000003FF
+
+#define DEFAULT_82543_TIPG_IPGR1 8
+#define E1000_TIPG_IPGR1_SHIFT 10
+
+#define DEFAULT_82543_TIPG_IPGR2 6
+#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7
+#define E1000_TIPG_IPGR2_SHIFT 20
+
+/* Ethertype field values */
+#define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */
+
+#define ETHERNET_FCS_SIZE 4
+#define MAX_JUMBO_FRAME_SIZE 0x3F00
+
+/* Extended Configuration Control and Size */
+#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP 0x00000020
+#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE 0x00000001
+#define E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE 0x00000008
+#define E1000_EXTCNF_CTRL_SWFLAG 0x00000020
+#define E1000_EXTCNF_CTRL_GATE_PHY_CFG 0x00000080
+#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK 0x00FF0000
+#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT 16
+#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK 0x0FFF0000
+#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT 16
+
+#define E1000_PHY_CTRL_D0A_LPLU 0x00000002
+#define E1000_PHY_CTRL_NOND0A_LPLU 0x00000004
+#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE 0x00000008
+#define E1000_PHY_CTRL_GBE_DISABLE 0x00000040
+
+#define E1000_KABGTXD_BGSQLBIAS 0x00050000
+
+/* PBA constants */
+#define E1000_PBA_8K 0x0008 /* 8KB */
+#define E1000_PBA_10K 0x000A /* 10KB */
+#define E1000_PBA_12K 0x000C /* 12KB */
+#define E1000_PBA_14K 0x000E /* 14KB */
+#define E1000_PBA_16K 0x0010 /* 16KB */
+#define E1000_PBA_18K 0x0012
+#define E1000_PBA_20K 0x0014
+#define E1000_PBA_22K 0x0016
+#define E1000_PBA_24K 0x0018
+#define E1000_PBA_26K 0x001A
+#define E1000_PBA_30K 0x001E
+#define E1000_PBA_32K 0x0020
+#define E1000_PBA_34K 0x0022
+#define E1000_PBA_35K 0x0023
+#define E1000_PBA_38K 0x0026
+#define E1000_PBA_40K 0x0028
+#define E1000_PBA_48K 0x0030 /* 48KB */
+#define E1000_PBA_64K 0x0040 /* 64KB */
+
+#define E1000_PBA_RXA_MASK 0xFFFF
+
+#define E1000_PBS_16K E1000_PBA_16K
+
+#define IFS_MAX 80
+#define IFS_MIN 40
+#define IFS_RATIO 4
+#define IFS_STEP 10
+#define MIN_NUM_XMITS 1000
+
+/* SW Semaphore Register */
+#define E1000_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */
+#define E1000_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */
+#define E1000_SWSM_DRV_LOAD 0x00000008 /* Driver Loaded Bit */
+
+#define E1000_SWSM2_LOCK 0x00000002 /* Secondary driver semaphore bit */
+
+/* Interrupt Cause Read */
+#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */
+#define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */
+#define E1000_ICR_LSC 0x00000004 /* Link Status Change */
+#define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */
+#define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */
+#define E1000_ICR_RXO 0x00000040 /* Rx overrun */
+#define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */
+#define E1000_ICR_VMMB 0x00000100 /* VM MB event */
+#define E1000_ICR_RXCFG 0x00000400 /* Rx /c/ ordered set */
+#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */
+#define E1000_ICR_GPI_EN1 0x00001000 /* GP Int 1 */
+#define E1000_ICR_GPI_EN2 0x00002000 /* GP Int 2 */
+#define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */
+#define E1000_ICR_TXD_LOW 0x00008000
+#define E1000_ICR_MNG 0x00040000 /* Manageability event */
+#define E1000_ICR_TS 0x00080000 /* Time Sync Interrupt */
+#define E1000_ICR_DRSTA 0x40000000 /* Device Reset Asserted */
+/* If this bit asserted, the driver should claim the interrupt */
+#define E1000_ICR_INT_ASSERTED 0x80000000
+#define E1000_ICR_DOUTSYNC 0x10000000 /* NIC DMA out of sync */
+#define E1000_ICR_FER 0x00400000 /* Fatal Error */
+
+#define E1000_ICR_THS 0x00800000 /* ICR.THS: Thermal Sensor Event*/
+#define E1000_ICR_MDDET 0x10000000 /* Malicious Driver Detect */
+
+
+/* Extended Interrupt Cause Read */
+#define E1000_EICR_RX_QUEUE0 0x00000001 /* Rx Queue 0 Interrupt */
+#define E1000_EICR_RX_QUEUE1 0x00000002 /* Rx Queue 1 Interrupt */
+#define E1000_EICR_RX_QUEUE2 0x00000004 /* Rx Queue 2 Interrupt */
+#define E1000_EICR_RX_QUEUE3 0x00000008 /* Rx Queue 3 Interrupt */
+#define E1000_EICR_TX_QUEUE0 0x00000100 /* Tx Queue 0 Interrupt */
+#define E1000_EICR_TX_QUEUE1 0x00000200 /* Tx Queue 1 Interrupt */
+#define E1000_EICR_TX_QUEUE2 0x00000400 /* Tx Queue 2 Interrupt */
+#define E1000_EICR_TX_QUEUE3 0x00000800 /* Tx Queue 3 Interrupt */
+#define E1000_EICR_TCP_TIMER 0x40000000 /* TCP Timer */
+#define E1000_EICR_OTHER 0x80000000 /* Interrupt Cause Active */
+/* TCP Timer */
+#define E1000_TCPTIMER_KS 0x00000100 /* KickStart */
+#define E1000_TCPTIMER_COUNT_ENABLE 0x00000200 /* Count Enable */
+#define E1000_TCPTIMER_COUNT_FINISH 0x00000400 /* Count finish */
+#define E1000_TCPTIMER_LOOP 0x00000800 /* Loop */
+
+/* This defines the bits that are set in the Interrupt Mask
+ * Set/Read Register. Each bit is documented below:
+ * o RXT0 = Receiver Timer Interrupt (ring 0)
+ * o TXDW = Transmit Descriptor Written Back
+ * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
+ * o RXSEQ = Receive Sequence Error
+ * o LSC = Link Status Change
+ */
+#define IMS_ENABLE_MASK ( \
+ E1000_IMS_RXT0 | \
+ E1000_IMS_TXDW | \
+ E1000_IMS_RXDMT0 | \
+ E1000_IMS_RXSEQ | \
+ E1000_IMS_LSC)
+
+/* Interrupt Mask Set */
+#define E1000_IMS_TXDW E1000_ICR_TXDW /* Tx desc written back */
+#define E1000_IMS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */
+#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */
+#define E1000_IMS_VMMB E1000_ICR_VMMB /* Mail box activity */
+#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */
+#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */
+#define E1000_IMS_RXO E1000_ICR_RXO /* Rx overrun */
+#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* Rx timer intr */
+#define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW
+#define E1000_IMS_TS E1000_ICR_TS /* Time Sync Interrupt */
+#define E1000_IMS_DRSTA E1000_ICR_DRSTA /* Device Reset Asserted */
+#define E1000_IMS_DOUTSYNC E1000_ICR_DOUTSYNC /* NIC DMA out of sync */
+#define E1000_IMS_FER E1000_ICR_FER /* Fatal Error */
+
+#define E1000_IMS_THS E1000_ICR_THS /* ICR.TS: Thermal Sensor Event*/
+#define E1000_IMS_MDDET E1000_ICR_MDDET /* Malicious Driver Detect */
+/* Extended Interrupt Mask Set */
+#define E1000_EIMS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */
+#define E1000_EIMS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */
+#define E1000_EIMS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */
+#define E1000_EIMS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */
+#define E1000_EIMS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */
+#define E1000_EIMS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */
+#define E1000_EIMS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */
+#define E1000_EIMS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */
+#define E1000_EIMS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */
+#define E1000_EIMS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */
+
+/* Interrupt Cause Set */
+#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */
+#define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */
+#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */
+
+/* Extended Interrupt Cause Set */
+#define E1000_EICS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */
+#define E1000_EICS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */
+#define E1000_EICS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */
+#define E1000_EICS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */
+#define E1000_EICS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */
+#define E1000_EICS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */
+#define E1000_EICS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */
+#define E1000_EICS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */
+#define E1000_EICS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */
+#define E1000_EICS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */
+
+#define E1000_EITR_ITR_INT_MASK 0x0000FFFF
+/* E1000_EITR_CNT_IGNR is only for 82576 and newer */
+#define E1000_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */
+#define E1000_EITR_INTERVAL 0x00007FFC
+
+/* Transmit Descriptor Control */
+#define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */
+#define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */
+#define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */
+#define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */
+#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */
+#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */
+/* Enable the counting of descriptors still to be processed. */
+#define E1000_TXDCTL_COUNT_DESC 0x00400000
+
+/* Flow Control Constants */
+#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001
+#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
+#define FLOW_CONTROL_TYPE 0x8808
+
+/* 802.1q VLAN Packet Size */
+#define VLAN_TAG_SIZE 4 /* 802.3ac tag (not DMA'd) */
+#define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */
+
+/* Receive Address
+ * Number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor.
+ * Technically, we have 16 spots. However, we reserve one of these spots
+ * (RAR[15]) for our directed address used by controllers with
+ * manageability enabled, allowing us room for 15 multicast addresses.
+ */
+#define E1000_RAR_ENTRIES 15
+#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */
+#define E1000_RAL_MAC_ADDR_LEN 4
+#define E1000_RAH_MAC_ADDR_LEN 2
+#define E1000_RAH_QUEUE_MASK_82575 0x000C0000
+#define E1000_RAH_POOL_1 0x00040000
+
+/* Error Codes */
+#define E1000_SUCCESS 0
+#define E1000_ERR_NVM 1
+#define E1000_ERR_PHY 2
+#define E1000_ERR_CONFIG 3
+#define E1000_ERR_PARAM 4
+#define E1000_ERR_MAC_INIT 5
+#define E1000_ERR_PHY_TYPE 6
+#define E1000_ERR_RESET 9
+#define E1000_ERR_MASTER_REQUESTS_PENDING 10
+#define E1000_ERR_HOST_INTERFACE_COMMAND 11
+#define E1000_BLK_PHY_RESET 12
+#define E1000_ERR_SWFW_SYNC 13
+#define E1000_NOT_IMPLEMENTED 14
+#define E1000_ERR_MBX 15
+#define E1000_ERR_INVALID_ARGUMENT 16
+#define E1000_ERR_NO_SPACE 17
+#define E1000_ERR_NVM_PBA_SECTION 18
+#define E1000_ERR_I2C 19
+#define E1000_ERR_INVM_VALUE_NOT_FOUND 20
+
+/* Loop limit on how long we wait for auto-negotiation to complete */
+#define FIBER_LINK_UP_LIMIT 50
+#define COPPER_LINK_UP_LIMIT 10
+#define PHY_AUTO_NEG_LIMIT 45
+#define PHY_FORCE_LIMIT 20
+/* Number of 100 microseconds we wait for PCI Express master disable */
+#define MASTER_DISABLE_TIMEOUT 800
+/* Number of milliseconds we wait for PHY configuration done after MAC reset */
+#define PHY_CFG_TIMEOUT 100
+/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */
+#define MDIO_OWNERSHIP_TIMEOUT 10
+/* Number of milliseconds for NVM auto read done after MAC reset. */
+#define AUTO_READ_DONE_TIMEOUT 10
+
+/* Flow Control */
+#define E1000_FCRTH_RTH 0x0000FFF8 /* Mask Bits[15:3] for RTH */
+#define E1000_FCRTL_RTL 0x0000FFF8 /* Mask Bits[15:3] for RTL */
+#define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */
+
+/* Transmit Configuration Word */
+#define E1000_TXCW_FD 0x00000020 /* TXCW full duplex */
+#define E1000_TXCW_PAUSE 0x00000080 /* TXCW sym pause request */
+#define E1000_TXCW_ASM_DIR 0x00000100 /* TXCW astm pause direction */
+#define E1000_TXCW_PAUSE_MASK 0x00000180 /* TXCW pause request mask */
+#define E1000_TXCW_ANE 0x80000000 /* Auto-neg enable */
+
+/* Receive Configuration Word */
+#define E1000_RXCW_CW 0x0000ffff /* RxConfigWord mask */
+#define E1000_RXCW_IV 0x08000000 /* Receive config invalid */
+#define E1000_RXCW_C 0x20000000 /* Receive config */
+#define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */
+
+#define E1000_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */
+#define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */
+
+#define E1000_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */
+#define E1000_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */
+#define E1000_TSYNCRXCTL_TYPE_L2_V2 0x00
+#define E1000_TSYNCRXCTL_TYPE_L4_V1 0x02
+#define E1000_TSYNCRXCTL_TYPE_L2_L4_V2 0x04
+#define E1000_TSYNCRXCTL_TYPE_ALL 0x08
+#define E1000_TSYNCRXCTL_TYPE_EVENT_V2 0x0A
+#define E1000_TSYNCRXCTL_ENABLED 0x00000010 /* enable Rx timestamping */
+#define E1000_TSYNCRXCTL_SYSCFI 0x00000020 /* Sys clock frequency */
+
+#define E1000_TSYNCRXCFG_PTP_V1_CTRLT_MASK 0x000000FF
+#define E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE 0x00
+#define E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE 0x01
+#define E1000_TSYNCRXCFG_PTP_V1_FOLLOWUP_MESSAGE 0x02
+#define E1000_TSYNCRXCFG_PTP_V1_DELAY_RESP_MESSAGE 0x03
+#define E1000_TSYNCRXCFG_PTP_V1_MANAGEMENT_MESSAGE 0x04
+
+#define E1000_TSYNCRXCFG_PTP_V2_MSGID_MASK 0x00000F00
+#define E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE 0x0000
+#define E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE 0x0100
+#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_REQ_MESSAGE 0x0200
+#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_RESP_MESSAGE 0x0300
+#define E1000_TSYNCRXCFG_PTP_V2_FOLLOWUP_MESSAGE 0x0800
+#define E1000_TSYNCRXCFG_PTP_V2_DELAY_RESP_MESSAGE 0x0900
+#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_FOLLOWUP_MESSAGE 0x0A00
+#define E1000_TSYNCRXCFG_PTP_V2_ANNOUNCE_MESSAGE 0x0B00
+#define E1000_TSYNCRXCFG_PTP_V2_SIGNALLING_MESSAGE 0x0C00
+#define E1000_TSYNCRXCFG_PTP_V2_MANAGEMENT_MESSAGE 0x0D00
+
+#define E1000_TIMINCA_16NS_SHIFT 24
+#define E1000_TIMINCA_INCPERIOD_SHIFT 24
+#define E1000_TIMINCA_INCVALUE_MASK 0x00FFFFFF
+
+#define E1000_TSICR_TXTS 0x00000002
+#define E1000_TSIM_TXTS 0x00000002
+/* TUPLE Filtering Configuration */
+#define E1000_TTQF_DISABLE_MASK 0xF0008000 /* TTQF Disable Mask */
+#define E1000_TTQF_QUEUE_ENABLE 0x100 /* TTQF Queue Enable Bit */
+#define E1000_TTQF_PROTOCOL_MASK 0xFF /* TTQF Protocol Mask */
+/* TTQF TCP Bit, shift with E1000_TTQF_PROTOCOL SHIFT */
+#define E1000_TTQF_PROTOCOL_TCP 0x0
+/* TTQF UDP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */
+#define E1000_TTQF_PROTOCOL_UDP 0x1
+/* TTQF SCTP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */
+#define E1000_TTQF_PROTOCOL_SCTP 0x2
+#define E1000_TTQF_PROTOCOL_SHIFT 5 /* TTQF Protocol Shift */
+#define E1000_TTQF_QUEUE_SHIFT 16 /* TTQF Queue Shfit */
+#define E1000_TTQF_RX_QUEUE_MASK 0x70000 /* TTQF Queue Mask */
+#define E1000_TTQF_MASK_ENABLE 0x10000000 /* TTQF Mask Enable Bit */
+#define E1000_IMIR_CLEAR_MASK 0xF001FFFF /* IMIR Reg Clear Mask */
+#define E1000_IMIR_PORT_BYPASS 0x20000 /* IMIR Port Bypass Bit */
+#define E1000_IMIR_PRIORITY_SHIFT 29 /* IMIR Priority Shift */
+#define E1000_IMIREXT_CLEAR_MASK 0x7FFFF /* IMIREXT Reg Clear Mask */
+
+#define E1000_MDICNFG_EXT_MDIO 0x80000000 /* MDI ext/int destination */
+#define E1000_MDICNFG_COM_MDIO 0x40000000 /* MDI shared w/ lan 0 */
+#define E1000_MDICNFG_PHY_MASK 0x03E00000
+#define E1000_MDICNFG_PHY_SHIFT 21
+
+#define E1000_MEDIA_PORT_COPPER 1
+#define E1000_MEDIA_PORT_OTHER 2
+#define E1000_M88E1112_AUTO_COPPER_SGMII 0x2
+#define E1000_M88E1112_AUTO_COPPER_BASEX 0x3
+#define E1000_M88E1112_STATUS_LINK 0x0004 /* Interface Link Bit */
+#define E1000_M88E1112_MAC_CTRL_1 0x10
+#define E1000_M88E1112_MAC_CTRL_1_MODE_MASK 0x0380 /* Mode Select */
+#define E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT 7
+#define E1000_M88E1112_PAGE_ADDR 0x16
+#define E1000_M88E1112_STATUS 0x01
+
+#define E1000_THSTAT_LOW_EVENT 0x20000000 /* Low thermal threshold */
+#define E1000_THSTAT_MID_EVENT 0x00200000 /* Mid thermal threshold */
+#define E1000_THSTAT_HIGH_EVENT 0x00002000 /* High thermal threshold */
+#define E1000_THSTAT_PWR_DOWN 0x00000001 /* Power Down Event */
+#define E1000_THSTAT_LINK_THROTTLE 0x00000002 /* Link Spd Throttle Event */
+
+/* I350 EEE defines */
+#define E1000_IPCNFG_EEE_1G_AN 0x00000008 /* IPCNFG EEE Ena 1G AN */
+#define E1000_IPCNFG_EEE_100M_AN 0x00000004 /* IPCNFG EEE Ena 100M AN */
+#define E1000_EEER_TX_LPI_EN 0x00010000 /* EEER Tx LPI Enable */
+#define E1000_EEER_RX_LPI_EN 0x00020000 /* EEER Rx LPI Enable */
+#define E1000_EEER_LPI_FC 0x00040000 /* EEER Ena on Flow Cntrl */
+/* EEE status */
+#define E1000_EEER_EEE_NEG 0x20000000 /* EEE capability nego */
+#define E1000_EEER_RX_LPI_STATUS 0x40000000 /* Rx in LPI state */
+#define E1000_EEER_TX_LPI_STATUS 0x80000000 /* Tx in LPI state */
+#define E1000_EEE_LP_ADV_ADDR_I350 0x040F /* EEE LP Advertisement */
+#define E1000_M88E1543_PAGE_ADDR 0x16 /* Page Offset Register */
+#define E1000_M88E1543_EEE_CTRL_1 0x0
+#define E1000_M88E1543_EEE_CTRL_1_MS 0x0001 /* EEE Master/Slave */
+#define E1000_EEE_ADV_DEV_I354 7
+#define E1000_EEE_ADV_ADDR_I354 60
+#define E1000_EEE_ADV_100_SUPPORTED (1 << 1) /* 100BaseTx EEE Supported */
+#define E1000_EEE_ADV_1000_SUPPORTED (1 << 2) /* 1000BaseT EEE Supported */
+#define E1000_PCS_STATUS_DEV_I354 3
+#define E1000_PCS_STATUS_ADDR_I354 1
+#define E1000_PCS_STATUS_RX_LPI_RCVD 0x0400
+#define E1000_PCS_STATUS_TX_LPI_RCVD 0x0800
+#define E1000_EEE_SU_LPI_CLK_STP 0x00800000 /* EEE LPI Clock Stop */
+#define E1000_EEE_LP_ADV_DEV_I210 7 /* EEE LP Adv Device */
+#define E1000_EEE_LP_ADV_ADDR_I210 61 /* EEE LP Adv Register */
+/* PCI Express Control */
+#define E1000_GCR_RXD_NO_SNOOP 0x00000001
+#define E1000_GCR_RXDSCW_NO_SNOOP 0x00000002
+#define E1000_GCR_RXDSCR_NO_SNOOP 0x00000004
+#define E1000_GCR_TXD_NO_SNOOP 0x00000008
+#define E1000_GCR_TXDSCW_NO_SNOOP 0x00000010
+#define E1000_GCR_TXDSCR_NO_SNOOP 0x00000020
+#define E1000_GCR_CMPL_TMOUT_MASK 0x0000F000
+#define E1000_GCR_CMPL_TMOUT_10ms 0x00001000
+#define E1000_GCR_CMPL_TMOUT_RESEND 0x00010000
+#define E1000_GCR_CAP_VER2 0x00040000
+
+#define PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP | \
+ E1000_GCR_RXDSCW_NO_SNOOP | \
+ E1000_GCR_RXDSCR_NO_SNOOP | \
+ E1000_GCR_TXD_NO_SNOOP | \
+ E1000_GCR_TXDSCW_NO_SNOOP | \
+ E1000_GCR_TXDSCR_NO_SNOOP)
+
+#define E1000_MMDAC_FUNC_DATA 0x4000 /* Data, no post increment */
+
+/* mPHY address control and data registers */
+#define E1000_MPHY_ADDR_CTL 0x0024 /* Address Control Reg */
+#define E1000_MPHY_ADDR_CTL_OFFSET_MASK 0xFFFF0000
+#define E1000_MPHY_DATA 0x0E10 /* Data Register */
+
+/* AFE CSR Offset for PCS CLK */
+#define E1000_MPHY_PCS_CLK_REG_OFFSET 0x0004
+/* Override for near end digital loopback. */
+#define E1000_MPHY_PCS_CLK_REG_DIGINELBEN 0x10
+
+/* PHY Control Register */
+#define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */
+#define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */
+#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */
+#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */
+#define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */
+#define MII_CR_POWER_DOWN 0x0800 /* Power down */
+#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */
+#define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */
+#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */
+#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */
+#define MII_CR_SPEED_1000 0x0040
+#define MII_CR_SPEED_100 0x2000
+#define MII_CR_SPEED_10 0x0000
+
+/* PHY Status Register */
+#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */
+#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */
+#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */
+#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */
+#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */
+#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */
+#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */
+#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */
+#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */
+#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */
+#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */
+#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */
+#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */
+#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */
+#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */
+
+/* Autoneg Advertisement Register */
+#define NWAY_AR_SELECTOR_FIELD 0x0001 /* indicates IEEE 802.3 CSMA/CD */
+#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */
+#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */
+#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */
+#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */
+#define NWAY_AR_100T4_CAPS 0x0200 /* 100T4 Capable */
+#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */
+#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */
+#define NWAY_AR_REMOTE_FAULT 0x2000 /* Remote Fault detected */
+#define NWAY_AR_NEXT_PAGE 0x8000 /* Next Page ability supported */
+
+/* Link Partner Ability Register (Base Page) */
+#define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */
+#define NWAY_LPAR_10T_HD_CAPS 0x0020 /* LP 10T Half Dplx Capable */
+#define NWAY_LPAR_10T_FD_CAPS 0x0040 /* LP 10T Full Dplx Capable */
+#define NWAY_LPAR_100TX_HD_CAPS 0x0080 /* LP 100TX Half Dplx Capable */
+#define NWAY_LPAR_100TX_FD_CAPS 0x0100 /* LP 100TX Full Dplx Capable */
+#define NWAY_LPAR_100T4_CAPS 0x0200 /* LP is 100T4 Capable */
+#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */
+#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asym Pause Direction bit */
+#define NWAY_LPAR_REMOTE_FAULT 0x2000 /* LP detected Remote Fault */
+#define NWAY_LPAR_ACKNOWLEDGE 0x4000 /* LP rx'd link code word */
+#define NWAY_LPAR_NEXT_PAGE 0x8000 /* Next Page ability supported */
+
+/* Autoneg Expansion Register */
+#define NWAY_ER_LP_NWAY_CAPS 0x0001 /* LP has Auto Neg Capability */
+#define NWAY_ER_PAGE_RXD 0x0002 /* LP 10T Half Dplx Capable */
+#define NWAY_ER_NEXT_PAGE_CAPS 0x0004 /* LP 10T Full Dplx Capable */
+#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP 100TX Half Dplx Capable */
+#define NWAY_ER_PAR_DETECT_FAULT 0x0010 /* LP 100TX Full Dplx Capable */
+
+/* 1000BASE-T Control Register */
+#define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */
+#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */
+#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */
+/* 1=Repeater/switch device port 0=DTE device */
+#define CR_1000T_REPEATER_DTE 0x0400
+/* 1=Configure PHY as Master 0=Configure PHY as Slave */
+#define CR_1000T_MS_VALUE 0x0800
+/* 1=Master/Slave manual config value 0=Automatic Master/Slave config */
+#define CR_1000T_MS_ENABLE 0x1000
+#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */
+#define CR_1000T_TEST_MODE_1 0x2000 /* Transmit Waveform test */
+#define CR_1000T_TEST_MODE_2 0x4000 /* Master Transmit Jitter test */
+#define CR_1000T_TEST_MODE_3 0x6000 /* Slave Transmit Jitter test */
+#define CR_1000T_TEST_MODE_4 0x8000 /* Transmitter Distortion test */
+
+/* 1000BASE-T Status Register */
+#define SR_1000T_IDLE_ERROR_CNT 0x00FF /* Num idle err since last rd */
+#define SR_1000T_ASYM_PAUSE_DIR 0x0100 /* LP asym pause direction bit */
+#define SR_1000T_LP_HD_CAPS 0x0400 /* LP is 1000T HD capable */
+#define SR_1000T_LP_FD_CAPS 0x0800 /* LP is 1000T FD capable */
+#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */
+#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */
+#define SR_1000T_MS_CONFIG_RES 0x4000 /* 1=Local Tx Master, 0=Slave */
+#define SR_1000T_MS_CONFIG_FAULT 0x8000 /* Master/Slave config fault */
+
+#define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT 5
+
+/* PHY 1000 MII Register/Bit Definitions */
+/* PHY Registers defined by IEEE */
+#define PHY_CONTROL 0x00 /* Control Register */
+#define PHY_STATUS 0x01 /* Status Register */
+#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */
+#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */
+#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */
+#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */
+#define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */
+#define PHY_NEXT_PAGE_TX 0x07 /* Next Page Tx */
+#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */
+#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */
+#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */
+#define PHY_EXT_STATUS 0x0F /* Extended Status Reg */
+
+#define PHY_CONTROL_LB 0x4000 /* PHY Loopback bit */
+
+/* NVM Control */
+#define E1000_EECD_SK 0x00000001 /* NVM Clock */
+#define E1000_EECD_CS 0x00000002 /* NVM Chip Select */
+#define E1000_EECD_DI 0x00000004 /* NVM Data In */
+#define E1000_EECD_DO 0x00000008 /* NVM Data Out */
+#define E1000_EECD_REQ 0x00000040 /* NVM Access Request */
+#define E1000_EECD_GNT 0x00000080 /* NVM Access Grant */
+#define E1000_EECD_PRES 0x00000100 /* NVM Present */
+#define E1000_EECD_SIZE 0x00000200 /* NVM Size (0=64 word 1=256 word) */
+#define E1000_EECD_BLOCKED 0x00008000 /* Bit banging access blocked flag */
+#define E1000_EECD_ABORT 0x00010000 /* NVM operation aborted flag */
+#define E1000_EECD_TIMEOUT 0x00020000 /* NVM read operation timeout flag */
+#define E1000_EECD_ERROR_CLR 0x00040000 /* NVM error status clear bit */
+/* NVM Addressing bits based on type 0=small, 1=large */
+#define E1000_EECD_ADDR_BITS 0x00000400
+#define E1000_NVM_GRANT_ATTEMPTS 1000 /* NVM # attempts to gain grant */
+#define E1000_EECD_AUTO_RD 0x00000200 /* NVM Auto Read done */
+#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* NVM Size */
+#define E1000_EECD_SIZE_EX_SHIFT 11
+#define E1000_EECD_FLUPD 0x00080000 /* Update FLASH */
+#define E1000_EECD_AUPDEN 0x00100000 /* Ena Auto FLASH update */
+#define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */
+#define E1000_EECD_SEC1VAL_VALID_MASK (E1000_EECD_AUTO_RD | E1000_EECD_PRES)
+#define E1000_EECD_FLUPD_I210 0x00800000 /* Update FLASH */
+#define E1000_EECD_FLUDONE_I210 0x04000000 /* Update FLASH done */
+#define E1000_EECD_FLASH_DETECTED_I210 0x00080000 /* FLASH detected */
+#define E1000_EECD_SEC1VAL_I210 0x02000000 /* Sector One Valid */
+#define E1000_FLUDONE_ATTEMPTS 20000
+#define E1000_EERD_EEWR_MAX_COUNT 512 /* buffered EEPROM words rw */
+#define E1000_I210_FIFO_SEL_RX 0x00
+#define E1000_I210_FIFO_SEL_TX_QAV(_i) (0x02 + (_i))
+#define E1000_I210_FIFO_SEL_TX_LEGACY E1000_I210_FIFO_SEL_TX_QAV(0)
+#define E1000_I210_FIFO_SEL_BMC2OS_TX 0x06
+#define E1000_I210_FIFO_SEL_BMC2OS_RX 0x01
+
+#define E1000_I210_FLASH_SECTOR_SIZE 0x1000 /* 4KB FLASH sector unit size */
+/* Secure FLASH mode requires removing MSb */
+#define E1000_I210_FW_PTR_MASK 0x7FFF
+/* Firmware code revision field word offset*/
+#define E1000_I210_FW_VER_OFFSET 328
+
+#define E1000_NVM_RW_REG_DATA 16 /* Offset to data in NVM read/write regs */
+#define E1000_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */
+#define E1000_NVM_RW_REG_START 1 /* Start operation */
+#define E1000_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */
+#define E1000_NVM_POLL_WRITE 1 /* Flag for polling for write complete */
+#define E1000_NVM_POLL_READ 0 /* Flag for polling for read complete */
+#define E1000_FLASH_UPDATES 2000
+
+/* NVM Word Offsets */
+#define NVM_COMPAT 0x0003
+#define NVM_ID_LED_SETTINGS 0x0004
+#define NVM_VERSION 0x0005
+#define E1000_I210_NVM_FW_MODULE_PTR 0x0010
+#define E1000_I350_NVM_FW_MODULE_PTR 0x0051
+#define NVM_FUTURE_INIT_WORD1 0x0019
+#define NVM_ETRACK_WORD 0x0042
+#define NVM_ETRACK_HIWORD 0x0043
+#define NVM_COMB_VER_OFF 0x0083
+#define NVM_COMB_VER_PTR 0x003d
+
+/* NVM version defines */
+#define NVM_MAJOR_MASK 0xF000
+#define NVM_MINOR_MASK 0x0FF0
+#define NVM_IMAGE_ID_MASK 0x000F
+#define NVM_COMB_VER_MASK 0x00FF
+#define NVM_MAJOR_SHIFT 12
+#define NVM_MINOR_SHIFT 4
+#define NVM_COMB_VER_SHFT 8
+#define NVM_VER_INVALID 0xFFFF
+#define NVM_ETRACK_SHIFT 16
+#define NVM_ETRACK_VALID 0x8000
+#define NVM_NEW_DEC_MASK 0x0F00
+#define NVM_HEX_CONV 16
+#define NVM_HEX_TENS 10
+
+/* FW version defines */
+/* Offset of "Loader patch ptr" in Firmware Header */
+#define E1000_I350_NVM_FW_LOADER_PATCH_PTR_OFFSET 0x01
+/* Patch generation hour & minutes */
+#define E1000_I350_NVM_FW_VER_WORD1_OFFSET 0x04
+/* Patch generation month & day */
+#define E1000_I350_NVM_FW_VER_WORD2_OFFSET 0x05
+/* Patch generation year */
+#define E1000_I350_NVM_FW_VER_WORD3_OFFSET 0x06
+/* Patch major & minor numbers */
+#define E1000_I350_NVM_FW_VER_WORD4_OFFSET 0x07
+
+#define NVM_MAC_ADDR 0x0000
+#define NVM_SUB_DEV_ID 0x000B
+#define NVM_SUB_VEN_ID 0x000C
+#define NVM_DEV_ID 0x000D
+#define NVM_VEN_ID 0x000E
+#define NVM_INIT_CTRL_2 0x000F
+#define NVM_INIT_CTRL_4 0x0013
+#define NVM_LED_1_CFG 0x001C
+#define NVM_LED_0_2_CFG 0x001F
+
+#define NVM_COMPAT_VALID_CSUM 0x0001
+#define NVM_FUTURE_INIT_WORD1_VALID_CSUM 0x0040
+
+#define NVM_ETS_CFG 0x003E
+#define NVM_ETS_LTHRES_DELTA_MASK 0x07C0
+#define NVM_ETS_LTHRES_DELTA_SHIFT 6
+#define NVM_ETS_TYPE_MASK 0x0038
+#define NVM_ETS_TYPE_SHIFT 3
+#define NVM_ETS_TYPE_EMC 0x000
+#define NVM_ETS_NUM_SENSORS_MASK 0x0007
+#define NVM_ETS_DATA_LOC_MASK 0x3C00
+#define NVM_ETS_DATA_LOC_SHIFT 10
+#define NVM_ETS_DATA_INDEX_MASK 0x0300
+#define NVM_ETS_DATA_INDEX_SHIFT 8
+#define NVM_ETS_DATA_HTHRESH_MASK 0x00FF
+#define NVM_INIT_CONTROL2_REG 0x000F
+#define NVM_INIT_CONTROL3_PORT_B 0x0014
+#define NVM_INIT_3GIO_3 0x001A
+#define NVM_SWDEF_PINS_CTRL_PORT_0 0x0020
+#define NVM_INIT_CONTROL3_PORT_A 0x0024
+#define NVM_CFG 0x0012
+#define NVM_ALT_MAC_ADDR_PTR 0x0037
+#define NVM_CHECKSUM_REG 0x003F
+#define NVM_COMPATIBILITY_REG_3 0x0003
+#define NVM_COMPATIBILITY_BIT_MASK 0x8000
+
+#define E1000_NVM_CFG_DONE_PORT_0 0x040000 /* MNG config cycle done */
+#define E1000_NVM_CFG_DONE_PORT_1 0x080000 /* ...for second port */
+#define E1000_NVM_CFG_DONE_PORT_2 0x100000 /* ...for third port */
+#define E1000_NVM_CFG_DONE_PORT_3 0x200000 /* ...for fourth port */
+
+#define NVM_82580_LAN_FUNC_OFFSET(a) ((a) ? (0x40 + (0x40 * (a))) : 0)
+
+/* Mask bits for fields in Word 0x24 of the NVM */
+#define NVM_WORD24_COM_MDIO 0x0008 /* MDIO interface shared */
+#define NVM_WORD24_EXT_MDIO 0x0004 /* MDIO accesses routed extrnl */
+/* Offset of Link Mode bits for 82575/82576 */
+#define NVM_WORD24_LNK_MODE_OFFSET 8
+/* Offset of Link Mode bits for 82580 up */
+#define NVM_WORD24_82580_LNK_MODE_OFFSET 4
+
+
+/* Mask bits for fields in Word 0x0f of the NVM */
+#define NVM_WORD0F_PAUSE_MASK 0x3000
+#define NVM_WORD0F_PAUSE 0x1000
+#define NVM_WORD0F_ASM_DIR 0x2000
+
+/* Mask bits for fields in Word 0x1a of the NVM */
+#define NVM_WORD1A_ASPM_MASK 0x000C
+
+/* Mask bits for fields in Word 0x03 of the EEPROM */
+#define NVM_COMPAT_LOM 0x0800
+
+/* length of string needed to store PBA number */
+#define E1000_PBANUM_LENGTH 11
+
+/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
+#define NVM_SUM 0xBABA
+
+/* PBA (printed board assembly) number words */
+#define NVM_PBA_OFFSET_0 8
+#define NVM_PBA_OFFSET_1 9
+#define NVM_PBA_PTR_GUARD 0xFAFA
+#define NVM_RESERVED_WORD 0xFFFF
+#define NVM_WORD_SIZE_BASE_SHIFT 6
+
+/* NVM Commands - SPI */
+#define NVM_MAX_RETRY_SPI 5000 /* Max wait of 5ms, for RDY signal */
+#define NVM_READ_OPCODE_SPI 0x03 /* NVM read opcode */
+#define NVM_WRITE_OPCODE_SPI 0x02 /* NVM write opcode */
+#define NVM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = address bit-8 */
+#define NVM_WREN_OPCODE_SPI 0x06 /* NVM set Write Enable latch */
+#define NVM_RDSR_OPCODE_SPI 0x05 /* NVM read Status register */
+
+/* SPI NVM Status Register */
+#define NVM_STATUS_RDY_SPI 0x01
+
+/* Word definitions for ID LED Settings */
+#define ID_LED_RESERVED_0000 0x0000
+#define ID_LED_RESERVED_FFFF 0xFFFF
+#define ID_LED_DEFAULT ((ID_LED_OFF1_ON2 << 12) | \
+ (ID_LED_OFF1_OFF2 << 8) | \
+ (ID_LED_DEF1_DEF2 << 4) | \
+ (ID_LED_DEF1_DEF2))
+#define ID_LED_DEF1_DEF2 0x1
+#define ID_LED_DEF1_ON2 0x2
+#define ID_LED_DEF1_OFF2 0x3
+#define ID_LED_ON1_DEF2 0x4
+#define ID_LED_ON1_ON2 0x5
+#define ID_LED_ON1_OFF2 0x6
+#define ID_LED_OFF1_DEF2 0x7
+#define ID_LED_OFF1_ON2 0x8
+#define ID_LED_OFF1_OFF2 0x9
+
+#define IGP_ACTIVITY_LED_MASK 0xFFFFF0FF
+#define IGP_ACTIVITY_LED_ENABLE 0x0300
+#define IGP_LED3_MODE 0x07000000
+
+/* PCI/PCI-X/PCI-EX Config space */
+#define PCI_HEADER_TYPE_REGISTER 0x0E
+#define PCIE_LINK_STATUS 0x12
+#define PCIE_DEVICE_CONTROL2 0x28
+
+#define PCI_HEADER_TYPE_MULTIFUNC 0x80
+#define PCIE_LINK_WIDTH_MASK 0x3F0
+#define PCIE_LINK_WIDTH_SHIFT 4
+#define PCIE_LINK_SPEED_MASK 0x0F
+#define PCIE_LINK_SPEED_2500 0x01
+#define PCIE_LINK_SPEED_5000 0x02
+#define PCIE_DEVICE_CONTROL2_16ms 0x0005
+
+#ifndef ETH_ADDR_LEN
+#define ETH_ADDR_LEN 6
+#endif
+
+#define PHY_REVISION_MASK 0xFFFFFFF0
+#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */
+#define MAX_PHY_MULTI_PAGE_REG 0xF
+
+/* Bit definitions for valid PHY IDs.
+ * I = Integrated
+ * E = External
+ */
+#define M88E1000_E_PHY_ID 0x01410C50
+#define M88E1000_I_PHY_ID 0x01410C30
+#define M88E1011_I_PHY_ID 0x01410C20
+#define IGP01E1000_I_PHY_ID 0x02A80380
+#define M88E1111_I_PHY_ID 0x01410CC0
+#define M88E1543_E_PHY_ID 0x01410EA0
+#define M88E1112_E_PHY_ID 0x01410C90
+#define I347AT4_E_PHY_ID 0x01410DC0
+#define M88E1340M_E_PHY_ID 0x01410DF0
+#define GG82563_E_PHY_ID 0x01410CA0
+#define IGP03E1000_E_PHY_ID 0x02A80390
+#define IFE_E_PHY_ID 0x02A80330
+#define IFE_PLUS_E_PHY_ID 0x02A80320
+#define IFE_C_E_PHY_ID 0x02A80310
+#define I82580_I_PHY_ID 0x015403A0
+#define I350_I_PHY_ID 0x015403B0
+#define I210_I_PHY_ID 0x01410C00
+#define IGP04E1000_E_PHY_ID 0x02A80391
+#define M88_VENDOR 0x0141
+
+/* M88E1000 Specific Registers */
+#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Reg */
+#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Reg */
+#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Cntrl */
+#define M88E1000_RX_ERR_CNTR 0x15 /* Receive Error Counter */
+
+#define M88E1000_PHY_PAGE_SELECT 0x1D /* Reg 29 for pg number setting */
+#define M88E1000_PHY_GEN_CONTROL 0x1E /* meaning depends on reg 29 */
+
+/* M88E1000 PHY Specific Control Register */
+#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reverse enabled */
+/* MDI Crossover Mode bits 6:5 Manual MDI configuration */
+#define M88E1000_PSCR_MDI_MANUAL_MODE 0x0000
+#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020 /* Manual MDIX configuration */
+/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */
+#define M88E1000_PSCR_AUTO_X_1000T 0x0040
+/* Auto crossover enabled all speeds */
+#define M88E1000_PSCR_AUTO_X_MODE 0x0060
+#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Tx */
+
+/* M88E1000 PHY Specific Status Register */
+#define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */
+#define M88E1000_PSSR_DOWNSHIFT 0x0020 /* 1=Downshifted */
+#define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */
+/* 0 = <50M
+ * 1 = 50-80M
+ * 2 = 80-110M
+ * 3 = 110-140M
+ * 4 = >140M
+ */
+#define M88E1000_PSSR_CABLE_LENGTH 0x0380
+#define M88E1000_PSSR_LINK 0x0400 /* 1=Link up, 0=Link down */
+#define M88E1000_PSSR_SPD_DPLX_RESOLVED 0x0800 /* 1=Speed & Duplex resolved */
+#define M88E1000_PSSR_SPEED 0xC000 /* Speed, bits 14:15 */
+#define M88E1000_PSSR_1000MBS 0x8000 /* 10=1000Mbs */
+
+#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7
+
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the master
+ */
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the slave
+ */
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X 0x0100
+#define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */
+
+/* Intel I347AT4 Registers */
+#define I347AT4_PCDL 0x10 /* PHY Cable Diagnostics Length */
+#define I347AT4_PCDC 0x15 /* PHY Cable Diagnostics Control */
+#define I347AT4_PAGE_SELECT 0x16
+
+/* I347AT4 Extended PHY Specific Control Register */
+
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the master
+ */
+#define I347AT4_PSCR_DOWNSHIFT_ENABLE 0x0800
+#define I347AT4_PSCR_DOWNSHIFT_MASK 0x7000
+#define I347AT4_PSCR_DOWNSHIFT_1X 0x0000
+#define I347AT4_PSCR_DOWNSHIFT_2X 0x1000
+#define I347AT4_PSCR_DOWNSHIFT_3X 0x2000
+#define I347AT4_PSCR_DOWNSHIFT_4X 0x3000
+#define I347AT4_PSCR_DOWNSHIFT_5X 0x4000
+#define I347AT4_PSCR_DOWNSHIFT_6X 0x5000
+#define I347AT4_PSCR_DOWNSHIFT_7X 0x6000
+#define I347AT4_PSCR_DOWNSHIFT_8X 0x7000
+
+/* I347AT4 PHY Cable Diagnostics Control */
+#define I347AT4_PCDC_CABLE_LENGTH_UNIT 0x0400 /* 0=cm 1=meters */
+
+/* M88E1112 only registers */
+#define M88E1112_VCT_DSP_DISTANCE 0x001A
+
+/* M88EC018 Rev 2 specific DownShift settings */
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK 0x0E00
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X 0x0800
+
+/* Bits...
+ * 15-5: page
+ * 4-0: register offset
+ */
+#define GG82563_PAGE_SHIFT 5
+#define GG82563_REG(page, reg) \
+ (((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS))
+#define GG82563_MIN_ALT_REG 30
+
+/* GG82563 Specific Registers */
+#define GG82563_PHY_SPEC_CTRL GG82563_REG(0, 16) /* PHY Spec Cntrl */
+#define GG82563_PHY_PAGE_SELECT GG82563_REG(0, 22) /* Page Select */
+#define GG82563_PHY_SPEC_CTRL_2 GG82563_REG(0, 26) /* PHY Spec Cntrl2 */
+#define GG82563_PHY_PAGE_SELECT_ALT GG82563_REG(0, 29) /* Alt Page Select */
+
+/* MAC Specific Control Register */
+#define GG82563_PHY_MAC_SPEC_CTRL GG82563_REG(2, 21)
+
+#define GG82563_PHY_DSP_DISTANCE GG82563_REG(5, 26) /* DSP Distance */
+
+/* Page 193 - Port Control Registers */
+/* Kumeran Mode Control */
+#define GG82563_PHY_KMRN_MODE_CTRL GG82563_REG(193, 16)
+#define GG82563_PHY_PWR_MGMT_CTRL GG82563_REG(193, 20) /* Pwr Mgt Ctrl */
+
+/* Page 194 - KMRN Registers */
+#define GG82563_PHY_INBAND_CTRL GG82563_REG(194, 18) /* Inband Ctrl */
+
+/* MDI Control */
+#define E1000_MDIC_REG_MASK 0x001F0000
+#define E1000_MDIC_REG_SHIFT 16
+#define E1000_MDIC_PHY_MASK 0x03E00000
+#define E1000_MDIC_PHY_SHIFT 21
+#define E1000_MDIC_OP_WRITE 0x04000000
+#define E1000_MDIC_OP_READ 0x08000000
+#define E1000_MDIC_READY 0x10000000
+#define E1000_MDIC_ERROR 0x40000000
+#define E1000_MDIC_DEST 0x80000000
+
+/* SerDes Control */
+#define E1000_GEN_CTL_READY 0x80000000
+#define E1000_GEN_CTL_ADDRESS_SHIFT 8
+#define E1000_GEN_POLL_TIMEOUT 640
+
+/* LinkSec register fields */
+#define E1000_LSECTXCAP_SUM_MASK 0x00FF0000
+#define E1000_LSECTXCAP_SUM_SHIFT 16
+#define E1000_LSECRXCAP_SUM_MASK 0x00FF0000
+#define E1000_LSECRXCAP_SUM_SHIFT 16
+
+#define E1000_LSECTXCTRL_EN_MASK 0x00000003
+#define E1000_LSECTXCTRL_DISABLE 0x0
+#define E1000_LSECTXCTRL_AUTH 0x1
+#define E1000_LSECTXCTRL_AUTH_ENCRYPT 0x2
+#define E1000_LSECTXCTRL_AISCI 0x00000020
+#define E1000_LSECTXCTRL_PNTHRSH_MASK 0xFFFFFF00
+#define E1000_LSECTXCTRL_RSV_MASK 0x000000D8
+
+#define E1000_LSECRXCTRL_EN_MASK 0x0000000C
+#define E1000_LSECRXCTRL_EN_SHIFT 2
+#define E1000_LSECRXCTRL_DISABLE 0x0
+#define E1000_LSECRXCTRL_CHECK 0x1
+#define E1000_LSECRXCTRL_STRICT 0x2
+#define E1000_LSECRXCTRL_DROP 0x3
+#define E1000_LSECRXCTRL_PLSH 0x00000040
+#define E1000_LSECRXCTRL_RP 0x00000080
+#define E1000_LSECRXCTRL_RSV_MASK 0xFFFFFF33
+
+/* Tx Rate-Scheduler Config fields */
+#define E1000_RTTBCNRC_RS_ENA 0x80000000
+#define E1000_RTTBCNRC_RF_DEC_MASK 0x00003FFF
+#define E1000_RTTBCNRC_RF_INT_SHIFT 14
+#define E1000_RTTBCNRC_RF_INT_MASK \
+ (E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT)
+
+/* DMA Coalescing register fields */
+/* DMA Coalescing Watchdog Timer */
+#define E1000_DMACR_DMACWT_MASK 0x00003FFF
+/* DMA Coalescing Rx Threshold */
+#define E1000_DMACR_DMACTHR_MASK 0x00FF0000
+#define E1000_DMACR_DMACTHR_SHIFT 16
+/* Lx when no PCIe transactions */
+#define E1000_DMACR_DMAC_LX_MASK 0x30000000
+#define E1000_DMACR_DMAC_LX_SHIFT 28
+#define E1000_DMACR_DMAC_EN 0x80000000 /* Enable DMA Coalescing */
+/* DMA Coalescing BMC-to-OS Watchdog Enable */
+#define E1000_DMACR_DC_BMC2OSW_EN 0x00008000
+
+/* DMA Coalescing Transmit Threshold */
+#define E1000_DMCTXTH_DMCTTHR_MASK 0x00000FFF
+
+#define E1000_DMCTLX_TTLX_MASK 0x00000FFF /* Time to LX request */
+
+/* Rx Traffic Rate Threshold */
+#define E1000_DMCRTRH_UTRESH_MASK 0x0007FFFF
+/* Rx packet rate in current window */
+#define E1000_DMCRTRH_LRPRCW 0x80000000
+
+/* DMA Coal Rx Traffic Current Count */
+#define E1000_DMCCNT_CCOUNT_MASK 0x01FFFFFF
+
+/* Flow ctrl Rx Threshold High val */
+#define E1000_FCRTC_RTH_COAL_MASK 0x0003FFF0
+#define E1000_FCRTC_RTH_COAL_SHIFT 4
+/* Lx power decision based on DMA coal */
+#define E1000_PCIEMISC_LX_DECISION 0x00000080
+
+#define E1000_RXPBS_CFG_TS_EN 0x80000000 /* Timestamp in Rx buffer */
+#define E1000_RXPBS_SIZE_I210_MASK 0x0000003F /* Rx packet buffer size */
+#define E1000_TXPB0S_SIZE_I210_MASK 0x0000003F /* Tx packet buffer 0 size */
+
+/* Proxy Filter Control */
+#define E1000_PROXYFC_D0 0x00000001 /* Enable offload in D0 */
+#define E1000_PROXYFC_EX 0x00000004 /* Directed exact proxy */
+#define E1000_PROXYFC_MC 0x00000008 /* Directed MC Proxy */
+#define E1000_PROXYFC_BC 0x00000010 /* Broadcast Proxy Enable */
+#define E1000_PROXYFC_ARP_DIRECTED 0x00000020 /* Directed ARP Proxy Ena */
+#define E1000_PROXYFC_IPV4 0x00000040 /* Directed IPv4 Enable */
+#define E1000_PROXYFC_IPV6 0x00000080 /* Directed IPv6 Enable */
+#define E1000_PROXYFC_NS 0x00000200 /* IPv6 Neighbor Solicitation */
+#define E1000_PROXYFC_ARP 0x00000800 /* ARP Request Proxy Ena */
+/* Proxy Status */
+#define E1000_PROXYS_CLEAR 0xFFFFFFFF /* Clear */
+
+/* Firmware Status */
+#define E1000_FWSTS_FWRI 0x80000000 /* FW Reset Indication */
+/* VF Control */
+#define E1000_VTCTRL_RST 0x04000000 /* Reset VF */
+
+#define E1000_STATUS_LAN_ID_MASK 0x00000000C /* Mask for Lan ID field */
+/* Lan ID bit field offset in status register */
+#define E1000_STATUS_LAN_ID_OFFSET 2
+#define E1000_VFTA_ENTRIES 128
+#ifndef E1000_UNUSEDARG
+#define E1000_UNUSEDARG
+#endif /* E1000_UNUSEDARG */
+#endif /* _E1000_DEFINES_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
new file mode 100644
index 00000000..347cef71
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
@@ -0,0 +1,793 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_HW_H_
+#define _E1000_HW_H_
+
+#include "e1000_osdep.h"
+#include "e1000_regs.h"
+#include "e1000_defines.h"
+
+struct e1000_hw;
+
+#define E1000_DEV_ID_82576 0x10C9
+#define E1000_DEV_ID_82576_FIBER 0x10E6
+#define E1000_DEV_ID_82576_SERDES 0x10E7
+#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8
+#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526
+#define E1000_DEV_ID_82576_NS 0x150A
+#define E1000_DEV_ID_82576_NS_SERDES 0x1518
+#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D
+#define E1000_DEV_ID_82575EB_COPPER 0x10A7
+#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9
+#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6
+#define E1000_DEV_ID_82580_COPPER 0x150E
+#define E1000_DEV_ID_82580_FIBER 0x150F
+#define E1000_DEV_ID_82580_SERDES 0x1510
+#define E1000_DEV_ID_82580_SGMII 0x1511
+#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516
+#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527
+#define E1000_DEV_ID_I350_COPPER 0x1521
+#define E1000_DEV_ID_I350_FIBER 0x1522
+#define E1000_DEV_ID_I350_SERDES 0x1523
+#define E1000_DEV_ID_I350_SGMII 0x1524
+#define E1000_DEV_ID_I350_DA4 0x1546
+#define E1000_DEV_ID_I210_COPPER 0x1533
+#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534
+#define E1000_DEV_ID_I210_COPPER_IT 0x1535
+#define E1000_DEV_ID_I210_FIBER 0x1536
+#define E1000_DEV_ID_I210_SERDES 0x1537
+#define E1000_DEV_ID_I210_SGMII 0x1538
+#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B
+#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C
+#define E1000_DEV_ID_I211_COPPER 0x1539
+#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40
+#define E1000_DEV_ID_I354_SGMII 0x1F41
+#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45
+#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438
+#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A
+#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C
+#define E1000_DEV_ID_DH89XXCC_SFP 0x0440
+
+#define E1000_REVISION_0 0
+#define E1000_REVISION_1 1
+#define E1000_REVISION_2 2
+#define E1000_REVISION_3 3
+#define E1000_REVISION_4 4
+
+#define E1000_FUNC_0 0
+#define E1000_FUNC_1 1
+#define E1000_FUNC_2 2
+#define E1000_FUNC_3 3
+
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0 0
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1 3
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2 6
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3 9
+
+enum e1000_mac_type {
+ e1000_undefined = 0,
+ e1000_82575,
+ e1000_82576,
+ e1000_82580,
+ e1000_i350,
+ e1000_i354,
+ e1000_i210,
+ e1000_i211,
+ e1000_num_macs /* List is 1-based, so subtract 1 for true count. */
+};
+
+enum e1000_media_type {
+ e1000_media_type_unknown = 0,
+ e1000_media_type_copper = 1,
+ e1000_media_type_fiber = 2,
+ e1000_media_type_internal_serdes = 3,
+ e1000_num_media_types
+};
+
+enum e1000_nvm_type {
+ e1000_nvm_unknown = 0,
+ e1000_nvm_none,
+ e1000_nvm_eeprom_spi,
+ e1000_nvm_flash_hw,
+ e1000_nvm_invm,
+ e1000_nvm_flash_sw
+};
+
+enum e1000_nvm_override {
+ e1000_nvm_override_none = 0,
+ e1000_nvm_override_spi_small,
+ e1000_nvm_override_spi_large,
+};
+
+enum e1000_phy_type {
+ e1000_phy_unknown = 0,
+ e1000_phy_none,
+ e1000_phy_m88,
+ e1000_phy_igp,
+ e1000_phy_igp_2,
+ e1000_phy_gg82563,
+ e1000_phy_igp_3,
+ e1000_phy_ife,
+ e1000_phy_82580,
+ e1000_phy_vf,
+ e1000_phy_i210,
+};
+
+enum e1000_bus_type {
+ e1000_bus_type_unknown = 0,
+ e1000_bus_type_pci,
+ e1000_bus_type_pcix,
+ e1000_bus_type_pci_express,
+ e1000_bus_type_reserved
+};
+
+enum e1000_bus_speed {
+ e1000_bus_speed_unknown = 0,
+ e1000_bus_speed_33,
+ e1000_bus_speed_66,
+ e1000_bus_speed_100,
+ e1000_bus_speed_120,
+ e1000_bus_speed_133,
+ e1000_bus_speed_2500,
+ e1000_bus_speed_5000,
+ e1000_bus_speed_reserved
+};
+
+enum e1000_bus_width {
+ e1000_bus_width_unknown = 0,
+ e1000_bus_width_pcie_x1,
+ e1000_bus_width_pcie_x2,
+ e1000_bus_width_pcie_x4 = 4,
+ e1000_bus_width_pcie_x8 = 8,
+ e1000_bus_width_32,
+ e1000_bus_width_64,
+ e1000_bus_width_reserved
+};
+
+enum e1000_1000t_rx_status {
+ e1000_1000t_rx_status_not_ok = 0,
+ e1000_1000t_rx_status_ok,
+ e1000_1000t_rx_status_undefined = 0xFF
+};
+
+enum e1000_rev_polarity {
+ e1000_rev_polarity_normal = 0,
+ e1000_rev_polarity_reversed,
+ e1000_rev_polarity_undefined = 0xFF
+};
+
+enum e1000_fc_mode {
+ e1000_fc_none = 0,
+ e1000_fc_rx_pause,
+ e1000_fc_tx_pause,
+ e1000_fc_full,
+ e1000_fc_default = 0xFF
+};
+
+enum e1000_ms_type {
+ e1000_ms_hw_default = 0,
+ e1000_ms_force_master,
+ e1000_ms_force_slave,
+ e1000_ms_auto
+};
+
+enum e1000_smart_speed {
+ e1000_smart_speed_default = 0,
+ e1000_smart_speed_on,
+ e1000_smart_speed_off
+};
+
+enum e1000_serdes_link_state {
+ e1000_serdes_link_down = 0,
+ e1000_serdes_link_autoneg_progress,
+ e1000_serdes_link_autoneg_complete,
+ e1000_serdes_link_forced_up
+};
+
+#ifndef __le16
+#define __le16 u16
+#endif
+#ifndef __le32
+#define __le32 u32
+#endif
+#ifndef __le64
+#define __le64 u64
+#endif
+/* Receive Descriptor */
+struct e1000_rx_desc {
+ __le64 buffer_addr; /* Address of the descriptor's data buffer */
+ __le16 length; /* Length of data DMAed into data buffer */
+ __le16 csum; /* Packet checksum */
+ u8 status; /* Descriptor status */
+ u8 errors; /* Descriptor Errors */
+ __le16 special;
+};
+
+/* Receive Descriptor - Extended */
+union e1000_rx_desc_extended {
+ struct {
+ __le64 buffer_addr;
+ __le64 reserved;
+ } read;
+ struct {
+ struct {
+ __le32 mrq; /* Multiple Rx Queues */
+ union {
+ __le32 rss; /* RSS Hash */
+ struct {
+ __le16 ip_id; /* IP id */
+ __le16 csum; /* Packet Checksum */
+ } csum_ip;
+ } hi_dword;
+ } lower;
+ struct {
+ __le32 status_error; /* ext status/error */
+ __le16 length;
+ __le16 vlan; /* VLAN tag */
+ } upper;
+ } wb; /* writeback */
+};
+
+#define MAX_PS_BUFFERS 4
+
+/* Number of packet split data buffers (not including the header buffer) */
+#define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1)
+
+/* Receive Descriptor - Packet Split */
+union e1000_rx_desc_packet_split {
+ struct {
+ /* one buffer for protocol header(s), three data buffers */
+ __le64 buffer_addr[MAX_PS_BUFFERS];
+ } read;
+ struct {
+ struct {
+ __le32 mrq; /* Multiple Rx Queues */
+ union {
+ __le32 rss; /* RSS Hash */
+ struct {
+ __le16 ip_id; /* IP id */
+ __le16 csum; /* Packet Checksum */
+ } csum_ip;
+ } hi_dword;
+ } lower;
+ struct {
+ __le32 status_error; /* ext status/error */
+ __le16 length0; /* length of buffer 0 */
+ __le16 vlan; /* VLAN tag */
+ } middle;
+ struct {
+ __le16 header_status;
+ /* length of buffers 1-3 */
+ __le16 length[PS_PAGE_BUFFERS];
+ } upper;
+ __le64 reserved;
+ } wb; /* writeback */
+};
+
+/* Transmit Descriptor */
+struct e1000_tx_desc {
+ __le64 buffer_addr; /* Address of the descriptor's data buffer */
+ union {
+ __le32 data;
+ struct {
+ __le16 length; /* Data buffer length */
+ u8 cso; /* Checksum offset */
+ u8 cmd; /* Descriptor control */
+ } flags;
+ } lower;
+ union {
+ __le32 data;
+ struct {
+ u8 status; /* Descriptor status */
+ u8 css; /* Checksum start */
+ __le16 special;
+ } fields;
+ } upper;
+};
+
+/* Offload Context Descriptor */
+struct e1000_context_desc {
+ union {
+ __le32 ip_config;
+ struct {
+ u8 ipcss; /* IP checksum start */
+ u8 ipcso; /* IP checksum offset */
+ __le16 ipcse; /* IP checksum end */
+ } ip_fields;
+ } lower_setup;
+ union {
+ __le32 tcp_config;
+ struct {
+ u8 tucss; /* TCP checksum start */
+ u8 tucso; /* TCP checksum offset */
+ __le16 tucse; /* TCP checksum end */
+ } tcp_fields;
+ } upper_setup;
+ __le32 cmd_and_length;
+ union {
+ __le32 data;
+ struct {
+ u8 status; /* Descriptor status */
+ u8 hdr_len; /* Header length */
+ __le16 mss; /* Maximum segment size */
+ } fields;
+ } tcp_seg_setup;
+};
+
+/* Offload data descriptor */
+struct e1000_data_desc {
+ __le64 buffer_addr; /* Address of the descriptor's buffer address */
+ union {
+ __le32 data;
+ struct {
+ __le16 length; /* Data buffer length */
+ u8 typ_len_ext;
+ u8 cmd;
+ } flags;
+ } lower;
+ union {
+ __le32 data;
+ struct {
+ u8 status; /* Descriptor status */
+ u8 popts; /* Packet Options */
+ __le16 special;
+ } fields;
+ } upper;
+};
+
+/* Statistics counters collected by the MAC */
+struct e1000_hw_stats {
+ u64 crcerrs;
+ u64 algnerrc;
+ u64 symerrs;
+ u64 rxerrc;
+ u64 mpc;
+ u64 scc;
+ u64 ecol;
+ u64 mcc;
+ u64 latecol;
+ u64 colc;
+ u64 dc;
+ u64 tncrs;
+ u64 sec;
+ u64 cexterr;
+ u64 rlec;
+ u64 xonrxc;
+ u64 xontxc;
+ u64 xoffrxc;
+ u64 xofftxc;
+ u64 fcruc;
+ u64 prc64;
+ u64 prc127;
+ u64 prc255;
+ u64 prc511;
+ u64 prc1023;
+ u64 prc1522;
+ u64 gprc;
+ u64 bprc;
+ u64 mprc;
+ u64 gptc;
+ u64 gorc;
+ u64 gotc;
+ u64 rnbc;
+ u64 ruc;
+ u64 rfc;
+ u64 roc;
+ u64 rjc;
+ u64 mgprc;
+ u64 mgpdc;
+ u64 mgptc;
+ u64 tor;
+ u64 tot;
+ u64 tpr;
+ u64 tpt;
+ u64 ptc64;
+ u64 ptc127;
+ u64 ptc255;
+ u64 ptc511;
+ u64 ptc1023;
+ u64 ptc1522;
+ u64 mptc;
+ u64 bptc;
+ u64 tsctc;
+ u64 tsctfc;
+ u64 iac;
+ u64 icrxptc;
+ u64 icrxatc;
+ u64 ictxptc;
+ u64 ictxatc;
+ u64 ictxqec;
+ u64 ictxqmtc;
+ u64 icrxdmtc;
+ u64 icrxoc;
+ u64 cbtmpc;
+ u64 htdpmc;
+ u64 cbrdpc;
+ u64 cbrmpc;
+ u64 rpthc;
+ u64 hgptc;
+ u64 htcbdpc;
+ u64 hgorc;
+ u64 hgotc;
+ u64 lenerrs;
+ u64 scvpc;
+ u64 hrmpc;
+ u64 doosync;
+ u64 o2bgptc;
+ u64 o2bspc;
+ u64 b2ospc;
+ u64 b2ogprc;
+};
+
+
+struct e1000_phy_stats {
+ u32 idle_errors;
+ u32 receive_errors;
+};
+
+struct e1000_host_mng_dhcp_cookie {
+ u32 signature;
+ u8 status;
+ u8 reserved0;
+ u16 vlan_id;
+ u32 reserved1;
+ u16 reserved2;
+ u8 reserved3;
+ u8 checksum;
+};
+
+/* Host Interface "Rev 1" */
+struct e1000_host_command_header {
+ u8 command_id;
+ u8 command_length;
+ u8 command_options;
+ u8 checksum;
+};
+
+#define E1000_HI_MAX_DATA_LENGTH 252
+struct e1000_host_command_info {
+ struct e1000_host_command_header command_header;
+ u8 command_data[E1000_HI_MAX_DATA_LENGTH];
+};
+
+/* Host Interface "Rev 2" */
+struct e1000_host_mng_command_header {
+ u8 command_id;
+ u8 checksum;
+ u16 reserved1;
+ u16 reserved2;
+ u16 command_length;
+};
+
+#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8
+struct e1000_host_mng_command_info {
+ struct e1000_host_mng_command_header command_header;
+ u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH];
+};
+
+#include "e1000_mac.h"
+#include "e1000_phy.h"
+#include "e1000_nvm.h"
+#include "e1000_manage.h"
+#include "e1000_mbx.h"
+
+/* Function pointers for the MAC. */
+struct e1000_mac_operations {
+ s32 (*init_params)(struct e1000_hw *);
+ s32 (*id_led_init)(struct e1000_hw *);
+ s32 (*blink_led)(struct e1000_hw *);
+ bool (*check_mng_mode)(struct e1000_hw *);
+ s32 (*check_for_link)(struct e1000_hw *);
+ s32 (*cleanup_led)(struct e1000_hw *);
+ void (*clear_hw_cntrs)(struct e1000_hw *);
+ void (*clear_vfta)(struct e1000_hw *);
+ s32 (*get_bus_info)(struct e1000_hw *);
+ void (*set_lan_id)(struct e1000_hw *);
+ s32 (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *);
+ s32 (*led_on)(struct e1000_hw *);
+ s32 (*led_off)(struct e1000_hw *);
+ void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32);
+ s32 (*reset_hw)(struct e1000_hw *);
+ s32 (*init_hw)(struct e1000_hw *);
+ void (*shutdown_serdes)(struct e1000_hw *);
+ void (*power_up_serdes)(struct e1000_hw *);
+ s32 (*setup_link)(struct e1000_hw *);
+ s32 (*setup_physical_interface)(struct e1000_hw *);
+ s32 (*setup_led)(struct e1000_hw *);
+ void (*write_vfta)(struct e1000_hw *, u32, u32);
+ void (*config_collision_dist)(struct e1000_hw *);
+ void (*rar_set)(struct e1000_hw *, u8*, u32);
+ s32 (*read_mac_addr)(struct e1000_hw *);
+ s32 (*validate_mdi_setting)(struct e1000_hw *);
+ s32 (*get_thermal_sensor_data)(struct e1000_hw *);
+ s32 (*init_thermal_sensor_thresh)(struct e1000_hw *);
+ s32 (*acquire_swfw_sync)(struct e1000_hw *, u16);
+ void (*release_swfw_sync)(struct e1000_hw *, u16);
+};
+
+/* When to use various PHY register access functions:
+ *
+ * Func Caller
+ * Function Does Does When to use
+ * ~~~~~~~~~~~~ ~~~~~ ~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * X_reg L,P,A n/a for simple PHY reg accesses
+ * X_reg_locked P,A L for multiple accesses of different regs
+ * on different pages
+ * X_reg_page A L,P for multiple accesses of different regs
+ * on the same page
+ *
+ * Where X=[read|write], L=locking, P=sets page, A=register access
+ *
+ */
+struct e1000_phy_operations {
+ s32 (*init_params)(struct e1000_hw *);
+ s32 (*acquire)(struct e1000_hw *);
+ s32 (*check_polarity)(struct e1000_hw *);
+ s32 (*check_reset_block)(struct e1000_hw *);
+ s32 (*commit)(struct e1000_hw *);
+ s32 (*force_speed_duplex)(struct e1000_hw *);
+ s32 (*get_cfg_done)(struct e1000_hw *hw);
+ s32 (*get_cable_length)(struct e1000_hw *);
+ s32 (*get_info)(struct e1000_hw *);
+ s32 (*set_page)(struct e1000_hw *, u16);
+ s32 (*read_reg)(struct e1000_hw *, u32, u16 *);
+ s32 (*read_reg_locked)(struct e1000_hw *, u32, u16 *);
+ s32 (*read_reg_page)(struct e1000_hw *, u32, u16 *);
+ void (*release)(struct e1000_hw *);
+ s32 (*reset)(struct e1000_hw *);
+ s32 (*set_d0_lplu_state)(struct e1000_hw *, bool);
+ s32 (*set_d3_lplu_state)(struct e1000_hw *, bool);
+ s32 (*write_reg)(struct e1000_hw *, u32, u16);
+ s32 (*write_reg_locked)(struct e1000_hw *, u32, u16);
+ s32 (*write_reg_page)(struct e1000_hw *, u32, u16);
+ void (*power_up)(struct e1000_hw *);
+ void (*power_down)(struct e1000_hw *);
+ s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *);
+ s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8);
+};
+
+/* Function pointers for the NVM. */
+struct e1000_nvm_operations {
+ s32 (*init_params)(struct e1000_hw *);
+ s32 (*acquire)(struct e1000_hw *);
+ s32 (*read)(struct e1000_hw *, u16, u16, u16 *);
+ void (*release)(struct e1000_hw *);
+ void (*reload)(struct e1000_hw *);
+ s32 (*update)(struct e1000_hw *);
+ s32 (*valid_led_default)(struct e1000_hw *, u16 *);
+ s32 (*validate)(struct e1000_hw *);
+ s32 (*write)(struct e1000_hw *, u16, u16, u16 *);
+};
+
+#define E1000_MAX_SENSORS 3
+
+struct e1000_thermal_diode_data {
+ u8 location;
+ u8 temp;
+ u8 caution_thresh;
+ u8 max_op_thresh;
+};
+
+struct e1000_thermal_sensor_data {
+ struct e1000_thermal_diode_data sensor[E1000_MAX_SENSORS];
+};
+
+struct e1000_mac_info {
+ struct e1000_mac_operations ops;
+ u8 addr[ETH_ADDR_LEN];
+ u8 perm_addr[ETH_ADDR_LEN];
+
+ enum e1000_mac_type type;
+
+ u32 collision_delta;
+ u32 ledctl_default;
+ u32 ledctl_mode1;
+ u32 ledctl_mode2;
+ u32 mc_filter_type;
+ u32 tx_packet_delta;
+ u32 txcw;
+
+ u16 current_ifs_val;
+ u16 ifs_max_val;
+ u16 ifs_min_val;
+ u16 ifs_ratio;
+ u16 ifs_step_size;
+ u16 mta_reg_count;
+ u16 uta_reg_count;
+
+ /* Maximum size of the MTA register table in all supported adapters */
+ #define MAX_MTA_REG 128
+ u32 mta_shadow[MAX_MTA_REG];
+ u16 rar_entry_count;
+
+ u8 forced_speed_duplex;
+
+ bool adaptive_ifs;
+ bool has_fwsm;
+ bool arc_subsystem_valid;
+ bool asf_firmware_present;
+ bool autoneg;
+ bool autoneg_failed;
+ bool get_link_status;
+ bool in_ifs_mode;
+ enum e1000_serdes_link_state serdes_link_state;
+ bool serdes_has_link;
+ bool tx_pkt_filtering;
+ struct e1000_thermal_sensor_data thermal_sensor_data;
+};
+
+struct e1000_phy_info {
+ struct e1000_phy_operations ops;
+ enum e1000_phy_type type;
+
+ enum e1000_1000t_rx_status local_rx;
+ enum e1000_1000t_rx_status remote_rx;
+ enum e1000_ms_type ms_type;
+ enum e1000_ms_type original_ms_type;
+ enum e1000_rev_polarity cable_polarity;
+ enum e1000_smart_speed smart_speed;
+
+ u32 addr;
+ u32 id;
+ u32 reset_delay_us; /* in usec */
+ u32 revision;
+
+ enum e1000_media_type media_type;
+
+ u16 autoneg_advertised;
+ u16 autoneg_mask;
+ u16 cable_length;
+ u16 max_cable_length;
+ u16 min_cable_length;
+
+ u8 mdix;
+
+ bool disable_polarity_correction;
+ bool is_mdix;
+ bool polarity_correction;
+ bool reset_disable;
+ bool speed_downgraded;
+ bool autoneg_wait_to_complete;
+};
+
+struct e1000_nvm_info {
+ struct e1000_nvm_operations ops;
+ enum e1000_nvm_type type;
+ enum e1000_nvm_override override;
+
+ u32 flash_bank_size;
+ u32 flash_base_addr;
+
+ u16 word_size;
+ u16 delay_usec;
+ u16 address_bits;
+ u16 opcode_bits;
+ u16 page_size;
+};
+
+struct e1000_bus_info {
+ enum e1000_bus_type type;
+ enum e1000_bus_speed speed;
+ enum e1000_bus_width width;
+
+ u16 func;
+ u16 pci_cmd_word;
+};
+
+struct e1000_fc_info {
+ u32 high_water; /* Flow control high-water mark */
+ u32 low_water; /* Flow control low-water mark */
+ u16 pause_time; /* Flow control pause timer */
+ u16 refresh_time; /* Flow control refresh timer */
+ bool send_xon; /* Flow control send XON */
+ bool strict_ieee; /* Strict IEEE mode */
+ enum e1000_fc_mode current_mode; /* FC mode in effect */
+ enum e1000_fc_mode requested_mode; /* FC mode requested by caller */
+};
+
+struct e1000_mbx_operations {
+ s32 (*init_params)(struct e1000_hw *hw);
+ s32 (*read)(struct e1000_hw *, u32 *, u16, u16);
+ s32 (*write)(struct e1000_hw *, u32 *, u16, u16);
+ s32 (*read_posted)(struct e1000_hw *, u32 *, u16, u16);
+ s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16);
+ s32 (*check_for_msg)(struct e1000_hw *, u16);
+ s32 (*check_for_ack)(struct e1000_hw *, u16);
+ s32 (*check_for_rst)(struct e1000_hw *, u16);
+};
+
+struct e1000_mbx_stats {
+ u32 msgs_tx;
+ u32 msgs_rx;
+
+ u32 acks;
+ u32 reqs;
+ u32 rsts;
+};
+
+struct e1000_mbx_info {
+ struct e1000_mbx_operations ops;
+ struct e1000_mbx_stats stats;
+ u32 timeout;
+ u32 usec_delay;
+ u16 size;
+};
+
+struct e1000_dev_spec_82575 {
+ bool sgmii_active;
+ bool global_device_reset;
+ bool eee_disable;
+ bool module_plugged;
+ bool clear_semaphore_once;
+ u32 mtu;
+ struct sfp_e1000_flags eth_flags;
+ u8 media_port;
+ bool media_changed;
+};
+
+struct e1000_dev_spec_vf {
+ u32 vf_number;
+ u32 v2p_mailbox;
+};
+
+struct e1000_hw {
+ void *back;
+
+ u8 __iomem *hw_addr;
+ u8 __iomem *flash_address;
+ unsigned long io_base;
+
+ struct e1000_mac_info mac;
+ struct e1000_fc_info fc;
+ struct e1000_phy_info phy;
+ struct e1000_nvm_info nvm;
+ struct e1000_bus_info bus;
+ struct e1000_mbx_info mbx;
+ struct e1000_host_mng_dhcp_cookie mng_cookie;
+
+ union {
+ struct e1000_dev_spec_82575 _82575;
+ struct e1000_dev_spec_vf vf;
+ } dev_spec;
+
+ u16 device_id;
+ u16 subsystem_vendor_id;
+ u16 subsystem_device_id;
+ u16 vendor_id;
+
+ u8 revision_id;
+};
+
+#include "e1000_82575.h"
+#include "e1000_i210.h"
+
+/* These functions must be implemented by drivers */
+s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
+s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
new file mode 100644
index 00000000..1e9f3e6e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
@@ -0,0 +1,909 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+
+static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw);
+static void e1000_release_nvm_i210(struct e1000_hw *hw);
+static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw);
+static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words,
+ u16 *data);
+static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw);
+static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data);
+
+/**
+ * e1000_acquire_nvm_i210 - Request for access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the necessary semaphores for exclusive access to the EEPROM.
+ * Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ * Return successful if access grant bit set, else clear the request for
+ * EEPROM access and return -E1000_ERR_NVM (-1).
+ **/
+static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_acquire_nvm_i210");
+
+ ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
+
+ return ret_val;
+}
+
+/**
+ * e1000_release_nvm_i210 - Release exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Stop any current commands to the EEPROM and clear the EEPROM request bit,
+ * then release the semaphores acquired.
+ **/
+static void e1000_release_nvm_i210(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_release_nvm_i210");
+
+ e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
+}
+
+/**
+ * e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Acquire the SW/FW semaphore to access the PHY or NVM. The mask
+ * will also specify which port we're acquiring the lock for.
+ **/
+s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
+{
+ u32 swfw_sync;
+ u32 swmask = mask;
+ u32 fwmask = mask << 16;
+ s32 ret_val = E1000_SUCCESS;
+ s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
+
+ DEBUGFUNC("e1000_acquire_swfw_sync_i210");
+
+ while (i < timeout) {
+ if (e1000_get_hw_semaphore_i210(hw)) {
+ ret_val = -E1000_ERR_SWFW_SYNC;
+ goto out;
+ }
+
+ swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+ if (!(swfw_sync & (fwmask | swmask)))
+ break;
+
+ /*
+ * Firmware currently using resource (fwmask)
+ * or other software thread using resource (swmask)
+ */
+ e1000_put_hw_semaphore_generic(hw);
+ msec_delay_irq(5);
+ i++;
+ }
+
+ if (i == timeout) {
+ DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
+ ret_val = -E1000_ERR_SWFW_SYNC;
+ goto out;
+ }
+
+ swfw_sync |= swmask;
+ E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+ e1000_put_hw_semaphore_generic(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_release_swfw_sync_i210 - Release SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Release the SW/FW semaphore used to access the PHY or NVM. The mask
+ * will also specify which port we're releasing the lock for.
+ **/
+void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
+{
+ u32 swfw_sync;
+
+ DEBUGFUNC("e1000_release_swfw_sync_i210");
+
+ while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS)
+ ; /* Empty */
+
+ swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+ swfw_sync &= ~mask;
+ E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+ e1000_put_hw_semaphore_generic(hw);
+}
+
+/**
+ * e1000_get_hw_semaphore_i210 - Acquire hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the HW semaphore to access the PHY or NVM
+ **/
+static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw)
+{
+ u32 swsm;
+ s32 timeout = hw->nvm.word_size + 1;
+ s32 i = 0;
+
+ DEBUGFUNC("e1000_get_hw_semaphore_i210");
+
+ /* Get the SW semaphore */
+ while (i < timeout) {
+ swsm = E1000_READ_REG(hw, E1000_SWSM);
+ if (!(swsm & E1000_SWSM_SMBI))
+ break;
+
+ usec_delay(50);
+ i++;
+ }
+
+ if (i == timeout) {
+ /* In rare circumstances, the SW semaphore may already be held
+ * unintentionally. Clear the semaphore once before giving up.
+ */
+ if (hw->dev_spec._82575.clear_semaphore_once) {
+ hw->dev_spec._82575.clear_semaphore_once = false;
+ e1000_put_hw_semaphore_generic(hw);
+ for (i = 0; i < timeout; i++) {
+ swsm = E1000_READ_REG(hw, E1000_SWSM);
+ if (!(swsm & E1000_SWSM_SMBI))
+ break;
+
+ usec_delay(50);
+ }
+ }
+
+ /* If we do not have the semaphore here, we have to give up. */
+ if (i == timeout) {
+ DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
+ return -E1000_ERR_NVM;
+ }
+ }
+
+ /* Get the FW semaphore. */
+ for (i = 0; i < timeout; i++) {
+ swsm = E1000_READ_REG(hw, E1000_SWSM);
+ E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
+
+ /* Semaphore acquired if bit latched */
+ if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
+ break;
+
+ usec_delay(50);
+ }
+
+ if (i == timeout) {
+ /* Release semaphores */
+ e1000_put_hw_semaphore_generic(hw);
+ DEBUGOUT("Driver can't access the NVM\n");
+ return -E1000_ERR_NVM;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_nvm_srrd_i210 - Reads Shadow Ram using EERD register
+ * @hw: pointer to the HW structure
+ * @offset: offset of word in the Shadow Ram to read
+ * @words: number of words to read
+ * @data: word read from the Shadow Ram
+ *
+ * Reads a 16 bit word from the Shadow Ram using the EERD register.
+ * Uses necessary synchronization semaphores.
+ **/
+s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words,
+ u16 *data)
+{
+ s32 status = E1000_SUCCESS;
+ u16 i, count;
+
+ DEBUGFUNC("e1000_read_nvm_srrd_i210");
+
+ /* We cannot hold synchronization semaphores for too long,
+ * because of forceful takeover procedure. However it is more efficient
+ * to read in bursts than synchronizing access for each word. */
+ for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
+ count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
+ E1000_EERD_EEWR_MAX_COUNT : (words - i);
+ if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+ status = e1000_read_nvm_eerd(hw, offset, count,
+ data + i);
+ hw->nvm.ops.release(hw);
+ } else {
+ status = E1000_ERR_SWFW_SYNC;
+ }
+
+ if (status != E1000_SUCCESS)
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * e1000_write_nvm_srwr_i210 - Write to Shadow RAM using EEWR
+ * @hw: pointer to the HW structure
+ * @offset: offset within the Shadow RAM to be written to
+ * @words: number of words to write
+ * @data: 16 bit word(s) to be written to the Shadow RAM
+ *
+ * Writes data to Shadow RAM at offset using EEWR register.
+ *
+ * If e1000_update_nvm_checksum is not called after this function , the
+ * data will not be committed to FLASH and also Shadow RAM will most likely
+ * contain an invalid checksum.
+ *
+ * If error code is returned, data and Shadow RAM may be inconsistent - buffer
+ * partially written.
+ **/
+s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words,
+ u16 *data)
+{
+ s32 status = E1000_SUCCESS;
+ u16 i, count;
+
+ DEBUGFUNC("e1000_write_nvm_srwr_i210");
+
+ /* We cannot hold synchronization semaphores for too long,
+ * because of forceful takeover procedure. However it is more efficient
+ * to write in bursts than synchronizing access for each word. */
+ for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
+ count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
+ E1000_EERD_EEWR_MAX_COUNT : (words - i);
+ if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+ status = e1000_write_nvm_srwr(hw, offset, count,
+ data + i);
+ hw->nvm.ops.release(hw);
+ } else {
+ status = E1000_ERR_SWFW_SYNC;
+ }
+
+ if (status != E1000_SUCCESS)
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * e1000_write_nvm_srwr - Write to Shadow Ram using EEWR
+ * @hw: pointer to the HW structure
+ * @offset: offset within the Shadow Ram to be written to
+ * @words: number of words to write
+ * @data: 16 bit word(s) to be written to the Shadow Ram
+ *
+ * Writes data to Shadow Ram at offset using EEWR register.
+ *
+ * If e1000_update_nvm_checksum is not called after this function , the
+ * Shadow Ram will most likely contain an invalid checksum.
+ **/
+static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words,
+ u16 *data)
+{
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ u32 i, k, eewr = 0;
+ u32 attempts = 100000;
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("e1000_write_nvm_srwr");
+
+ /*
+ * A check for invalid values: offset too large, too many words,
+ * too many words for the offset, and not enough words.
+ */
+ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+ (words == 0)) {
+ DEBUGOUT("nvm parameter(s) out of bounds\n");
+ ret_val = -E1000_ERR_NVM;
+ goto out;
+ }
+
+ for (i = 0; i < words; i++) {
+ eewr = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) |
+ (data[i] << E1000_NVM_RW_REG_DATA) |
+ E1000_NVM_RW_REG_START;
+
+ E1000_WRITE_REG(hw, E1000_SRWR, eewr);
+
+ for (k = 0; k < attempts; k++) {
+ if (E1000_NVM_RW_REG_DONE &
+ E1000_READ_REG(hw, E1000_SRWR)) {
+ ret_val = E1000_SUCCESS;
+ break;
+ }
+ usec_delay(5);
+ }
+
+ if (ret_val != E1000_SUCCESS) {
+ DEBUGOUT("Shadow RAM write EEWR timed out\n");
+ break;
+ }
+ }
+
+out:
+ return ret_val;
+}
+
+/** e1000_read_invm_word_i210 - Reads OTP
+ * @hw: pointer to the HW structure
+ * @address: the word address (aka eeprom offset) to read
+ * @data: pointer to the data read
+ *
+ * Reads 16-bit words from the OTP. Return error when the word is not
+ * stored in OTP.
+ **/
+static s32 e1000_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data)
+{
+ s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
+ u32 invm_dword;
+ u16 i;
+ u8 record_type, word_address;
+
+ DEBUGFUNC("e1000_read_invm_word_i210");
+
+ for (i = 0; i < E1000_INVM_SIZE; i++) {
+ invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i));
+ /* Get record type */
+ record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword);
+ if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE)
+ break;
+ if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE)
+ i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS;
+ if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE)
+ i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS;
+ if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) {
+ word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword);
+ if (word_address == address) {
+ *data = INVM_DWORD_TO_WORD_DATA(invm_dword);
+ DEBUGOUT2("Read INVM Word 0x%02x = %x",
+ address, *data);
+ status = E1000_SUCCESS;
+ break;
+ }
+ }
+ }
+ if (status != E1000_SUCCESS)
+ DEBUGOUT1("Requested word 0x%02x not found in OTP\n", address);
+ return status;
+}
+
+/** e1000_read_invm_i210 - Read invm wrapper function for I210/I211
+ * @hw: pointer to the HW structure
+ * @address: the word address (aka eeprom offset) to read
+ * @data: pointer to the data read
+ *
+ * Wrapper function to return data formerly found in the NVM.
+ **/
+static s32 e1000_read_invm_i210(struct e1000_hw *hw, u16 offset,
+ u16 E1000_UNUSEDARG words, u16 *data)
+{
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("e1000_read_invm_i210");
+
+ /* Only the MAC addr is required to be present in the iNVM */
+ switch (offset) {
+ case NVM_MAC_ADDR:
+ ret_val = e1000_read_invm_word_i210(hw, (u8)offset, &data[0]);
+ ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+1,
+ &data[1]);
+ ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+2,
+ &data[2]);
+ if (ret_val != E1000_SUCCESS)
+ DEBUGOUT("MAC Addr not found in iNVM\n");
+ break;
+ case NVM_INIT_CTRL_2:
+ ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
+ if (ret_val != E1000_SUCCESS) {
+ *data = NVM_INIT_CTRL_2_DEFAULT_I211;
+ ret_val = E1000_SUCCESS;
+ }
+ break;
+ case NVM_INIT_CTRL_4:
+ ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
+ if (ret_val != E1000_SUCCESS) {
+ *data = NVM_INIT_CTRL_4_DEFAULT_I211;
+ ret_val = E1000_SUCCESS;
+ }
+ break;
+ case NVM_LED_1_CFG:
+ ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
+ if (ret_val != E1000_SUCCESS) {
+ *data = NVM_LED_1_CFG_DEFAULT_I211;
+ ret_val = E1000_SUCCESS;
+ }
+ break;
+ case NVM_LED_0_2_CFG:
+ ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
+ if (ret_val != E1000_SUCCESS) {
+ *data = NVM_LED_0_2_CFG_DEFAULT_I211;
+ ret_val = E1000_SUCCESS;
+ }
+ break;
+ case NVM_ID_LED_SETTINGS:
+ ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
+ if (ret_val != E1000_SUCCESS) {
+ *data = ID_LED_RESERVED_FFFF;
+ ret_val = E1000_SUCCESS;
+ }
+ break;
+ case NVM_SUB_DEV_ID:
+ *data = hw->subsystem_device_id;
+ break;
+ case NVM_SUB_VEN_ID:
+ *data = hw->subsystem_vendor_id;
+ break;
+ case NVM_DEV_ID:
+ *data = hw->device_id;
+ break;
+ case NVM_VEN_ID:
+ *data = hw->vendor_id;
+ break;
+ default:
+ DEBUGOUT1("NVM word 0x%02x is not mapped.\n", offset);
+ *data = NVM_RESERVED_WORD;
+ break;
+ }
+ return ret_val;
+}
+
+/**
+ * e1000_read_invm_version - Reads iNVM version and image type
+ * @hw: pointer to the HW structure
+ * @invm_ver: version structure for the version read
+ *
+ * Reads iNVM version and image type.
+ **/
+s32 e1000_read_invm_version(struct e1000_hw *hw,
+ struct e1000_fw_version *invm_ver)
+{
+ u32 *record = NULL;
+ u32 *next_record = NULL;
+ u32 i = 0;
+ u32 invm_dword = 0;
+ u32 invm_blocks = E1000_INVM_SIZE - (E1000_INVM_ULT_BYTES_SIZE /
+ E1000_INVM_RECORD_SIZE_IN_BYTES);
+ u32 buffer[E1000_INVM_SIZE];
+ s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
+ u16 version = 0;
+
+ DEBUGFUNC("e1000_read_invm_version");
+
+ /* Read iNVM memory */
+ for (i = 0; i < E1000_INVM_SIZE; i++) {
+ invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i));
+ buffer[i] = invm_dword;
+ }
+
+ /* Read version number */
+ for (i = 1; i < invm_blocks; i++) {
+ record = &buffer[invm_blocks - i];
+ next_record = &buffer[invm_blocks - i + 1];
+
+ /* Check if we have first version location used */
+ if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) {
+ version = 0;
+ status = E1000_SUCCESS;
+ break;
+ }
+ /* Check if we have second version location used */
+ else if ((i == 1) &&
+ ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) {
+ version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
+ status = E1000_SUCCESS;
+ break;
+ }
+ /*
+ * Check if we have odd version location
+ * used and it is the last one used
+ */
+ else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) &&
+ ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) &&
+ (i != 1))) {
+ version = (*next_record & E1000_INVM_VER_FIELD_TWO)
+ >> 13;
+ status = E1000_SUCCESS;
+ break;
+ }
+ /*
+ * Check if we have even version location
+ * used and it is the last one used
+ */
+ else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) &&
+ ((*record & 0x3) == 0)) {
+ version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
+ status = E1000_SUCCESS;
+ break;
+ }
+ }
+
+ if (status == E1000_SUCCESS) {
+ invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK)
+ >> E1000_INVM_MAJOR_SHIFT;
+ invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK;
+ }
+ /* Read Image Type */
+ for (i = 1; i < invm_blocks; i++) {
+ record = &buffer[invm_blocks - i];
+ next_record = &buffer[invm_blocks - i + 1];
+
+ /* Check if we have image type in first location used */
+ if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) {
+ invm_ver->invm_img_type = 0;
+ status = E1000_SUCCESS;
+ break;
+ }
+ /* Check if we have image type in first location used */
+ else if ((((*record & 0x3) == 0) &&
+ ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) ||
+ ((((*record & 0x3) != 0) && (i != 1)))) {
+ invm_ver->invm_img_type =
+ (*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23;
+ status = E1000_SUCCESS;
+ break;
+ }
+ }
+ return status;
+}
+
+/**
+ * e1000_validate_nvm_checksum_i210 - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ * and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ **/
+s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw)
+{
+ s32 status = E1000_SUCCESS;
+ s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *);
+
+ DEBUGFUNC("e1000_validate_nvm_checksum_i210");
+
+ if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+
+ /*
+ * Replace the read function with semaphore grabbing with
+ * the one that skips this for a while.
+ * We have semaphore taken already here.
+ */
+ read_op_ptr = hw->nvm.ops.read;
+ hw->nvm.ops.read = e1000_read_nvm_eerd;
+
+ status = e1000_validate_nvm_checksum_generic(hw);
+
+ /* Revert original read operation. */
+ hw->nvm.ops.read = read_op_ptr;
+
+ hw->nvm.ops.release(hw);
+ } else {
+ status = E1000_ERR_SWFW_SYNC;
+ }
+
+ return status;
+}
+
+
+/**
+ * e1000_update_nvm_checksum_i210 - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ * up to the checksum. Then calculates the EEPROM checksum and writes the
+ * value to the EEPROM. Next commit EEPROM data onto the Flash.
+ **/
+s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+ u16 checksum = 0;
+ u16 i, nvm_data;
+
+ DEBUGFUNC("e1000_update_nvm_checksum_i210");
+
+ /*
+ * Read the first word from the EEPROM. If this times out or fails, do
+ * not continue or we could be in for a very long wait while every
+ * EEPROM read fails
+ */
+ ret_val = e1000_read_nvm_eerd(hw, 0, 1, &nvm_data);
+ if (ret_val != E1000_SUCCESS) {
+ DEBUGOUT("EEPROM read failed\n");
+ goto out;
+ }
+
+ if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+ /*
+ * Do not use hw->nvm.ops.write, hw->nvm.ops.read
+ * because we do not want to take the synchronization
+ * semaphores twice here.
+ */
+
+ for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+ ret_val = e1000_read_nvm_eerd(hw, i, 1, &nvm_data);
+ if (ret_val) {
+ hw->nvm.ops.release(hw);
+ DEBUGOUT("NVM Read Error while updating checksum.\n");
+ goto out;
+ }
+ checksum += nvm_data;
+ }
+ checksum = (u16) NVM_SUM - checksum;
+ ret_val = e1000_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1,
+ &checksum);
+ if (ret_val != E1000_SUCCESS) {
+ hw->nvm.ops.release(hw);
+ DEBUGOUT("NVM Write Error while updating checksum.\n");
+ goto out;
+ }
+
+ hw->nvm.ops.release(hw);
+
+ ret_val = e1000_update_flash_i210(hw);
+ } else {
+ ret_val = E1000_ERR_SWFW_SYNC;
+ }
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_get_flash_presence_i210 - Check if flash device is detected.
+ * @hw: pointer to the HW structure
+ *
+ **/
+bool e1000_get_flash_presence_i210(struct e1000_hw *hw)
+{
+ u32 eec = 0;
+ bool ret_val = false;
+
+ DEBUGFUNC("e1000_get_flash_presence_i210");
+
+ eec = E1000_READ_REG(hw, E1000_EECD);
+
+ if (eec & E1000_EECD_FLASH_DETECTED_I210)
+ ret_val = true;
+
+ return ret_val;
+}
+
+/**
+ * e1000_update_flash_i210 - Commit EEPROM to the flash
+ * @hw: pointer to the HW structure
+ *
+ **/
+s32 e1000_update_flash_i210(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+ u32 flup;
+
+ DEBUGFUNC("e1000_update_flash_i210");
+
+ ret_val = e1000_pool_flash_update_done_i210(hw);
+ if (ret_val == -E1000_ERR_NVM) {
+ DEBUGOUT("Flash update time out\n");
+ goto out;
+ }
+
+ flup = E1000_READ_REG(hw, E1000_EECD) | E1000_EECD_FLUPD_I210;
+ E1000_WRITE_REG(hw, E1000_EECD, flup);
+
+ ret_val = e1000_pool_flash_update_done_i210(hw);
+ if (ret_val == E1000_SUCCESS)
+ DEBUGOUT("Flash update complete\n");
+ else
+ DEBUGOUT("Flash update time out\n");
+
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_pool_flash_update_done_i210 - Pool FLUDONE status.
+ * @hw: pointer to the HW structure
+ *
+ **/
+s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw)
+{
+ s32 ret_val = -E1000_ERR_NVM;
+ u32 i, reg;
+
+ DEBUGFUNC("e1000_pool_flash_update_done_i210");
+
+ for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) {
+ reg = E1000_READ_REG(hw, E1000_EECD);
+ if (reg & E1000_EECD_FLUDONE_I210) {
+ ret_val = E1000_SUCCESS;
+ break;
+ }
+ usec_delay(5);
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_init_nvm_params_i210 - Initialize i210 NVM function pointers
+ * @hw: pointer to the HW structure
+ *
+ * Initialize the i210/i211 NVM parameters and function pointers.
+ **/
+static s32 e1000_init_nvm_params_i210(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+ struct e1000_nvm_info *nvm = &hw->nvm;
+
+ DEBUGFUNC("e1000_init_nvm_params_i210");
+
+ ret_val = e1000_init_nvm_params_82575(hw);
+ nvm->ops.acquire = e1000_acquire_nvm_i210;
+ nvm->ops.release = e1000_release_nvm_i210;
+ nvm->ops.valid_led_default = e1000_valid_led_default_i210;
+ if (e1000_get_flash_presence_i210(hw)) {
+ hw->nvm.type = e1000_nvm_flash_hw;
+ nvm->ops.read = e1000_read_nvm_srrd_i210;
+ nvm->ops.write = e1000_write_nvm_srwr_i210;
+ nvm->ops.validate = e1000_validate_nvm_checksum_i210;
+ nvm->ops.update = e1000_update_nvm_checksum_i210;
+ } else {
+ hw->nvm.type = e1000_nvm_invm;
+ nvm->ops.read = e1000_read_invm_i210;
+ nvm->ops.write = e1000_null_write_nvm;
+ nvm->ops.validate = e1000_null_ops_generic;
+ nvm->ops.update = e1000_null_ops_generic;
+ }
+ return ret_val;
+}
+
+/**
+ * e1000_init_function_pointers_i210 - Init func ptrs.
+ * @hw: pointer to the HW structure
+ *
+ * Called to initialize all function pointers and parameters.
+ **/
+void e1000_init_function_pointers_i210(struct e1000_hw *hw)
+{
+ e1000_init_function_pointers_82575(hw);
+ hw->nvm.ops.init_params = e1000_init_nvm_params_i210;
+
+ return;
+}
+
+/**
+ * e1000_valid_led_default_i210 - Verify a valid default LED config
+ * @hw: pointer to the HW structure
+ * @data: pointer to the NVM (EEPROM)
+ *
+ * Read the EEPROM for the current default LED configuration. If the
+ * LED configuration is not valid, set to a valid LED configuration.
+ **/
+static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_valid_led_default_i210");
+
+ ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ goto out;
+ }
+
+ if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
+ switch (hw->phy.media_type) {
+ case e1000_media_type_internal_serdes:
+ *data = ID_LED_DEFAULT_I210_SERDES;
+ break;
+ case e1000_media_type_copper:
+ default:
+ *data = ID_LED_DEFAULT_I210;
+ break;
+ }
+ }
+out:
+ return ret_val;
+}
+
+/**
+ * __e1000_access_xmdio_reg - Read/write XMDIO register
+ * @hw: pointer to the HW structure
+ * @address: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: pointer to value to read/write from/to the XMDIO address
+ * @read: boolean flag to indicate read or write
+ **/
+static s32 __e1000_access_xmdio_reg(struct e1000_hw *hw, u16 address,
+ u8 dev_addr, u16 *data, bool read)
+{
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("__e1000_access_xmdio_reg");
+
+ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA |
+ dev_addr);
+ if (ret_val)
+ return ret_val;
+
+ if (read)
+ ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data);
+ else
+ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data);
+ if (ret_val)
+ return ret_val;
+
+ /* Recalibrate the device back to 0 */
+ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0);
+ if (ret_val)
+ return ret_val;
+
+ return ret_val;
+}
+
+/**
+ * e1000_read_xmdio_reg - Read XMDIO register
+ * @hw: pointer to the HW structure
+ * @addr: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: value to be read from the EMI address
+ **/
+s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data)
+{
+ DEBUGFUNC("e1000_read_xmdio_reg");
+
+ return __e1000_access_xmdio_reg(hw, addr, dev_addr, data, true);
+}
+
+/**
+ * e1000_write_xmdio_reg - Write XMDIO register
+ * @hw: pointer to the HW structure
+ * @addr: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: value to be written to the XMDIO address
+ **/
+s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data)
+{
+ DEBUGFUNC("e1000_read_xmdio_reg");
+
+ return __e1000_access_xmdio_reg(hw, addr, dev_addr, &data, false);
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
new file mode 100644
index 00000000..57b2eb56
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
@@ -0,0 +1,91 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_I210_H_
+#define _E1000_I210_H_
+
+bool e1000_get_flash_presence_i210(struct e1000_hw *hw);
+s32 e1000_update_flash_i210(struct e1000_hw *hw);
+s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw);
+s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw);
+s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset,
+ u16 words, u16 *data);
+s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset,
+ u16 words, u16 *data);
+s32 e1000_read_invm_version(struct e1000_hw *hw,
+ struct e1000_fw_version *invm_ver);
+s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
+void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
+s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
+ u16 *data);
+s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
+ u16 data);
+
+#define E1000_STM_OPCODE 0xDB00
+#define E1000_EEPROM_FLASH_SIZE_WORD 0x11
+
+#define INVM_DWORD_TO_RECORD_TYPE(invm_dword) \
+ (u8)((invm_dword) & 0x7)
+#define INVM_DWORD_TO_WORD_ADDRESS(invm_dword) \
+ (u8)(((invm_dword) & 0x0000FE00) >> 9)
+#define INVM_DWORD_TO_WORD_DATA(invm_dword) \
+ (u16)(((invm_dword) & 0xFFFF0000) >> 16)
+
+enum E1000_INVM_STRUCTURE_TYPE {
+ E1000_INVM_UNINITIALIZED_STRUCTURE = 0x00,
+ E1000_INVM_WORD_AUTOLOAD_STRUCTURE = 0x01,
+ E1000_INVM_CSR_AUTOLOAD_STRUCTURE = 0x02,
+ E1000_INVM_PHY_REGISTER_AUTOLOAD_STRUCTURE = 0x03,
+ E1000_INVM_RSA_KEY_SHA256_STRUCTURE = 0x04,
+ E1000_INVM_INVALIDATED_STRUCTURE = 0x0F,
+};
+
+#define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS 8
+#define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS 1
+#define E1000_INVM_ULT_BYTES_SIZE 8
+#define E1000_INVM_RECORD_SIZE_IN_BYTES 4
+#define E1000_INVM_VER_FIELD_ONE 0x1FF8
+#define E1000_INVM_VER_FIELD_TWO 0x7FE000
+#define E1000_INVM_IMGTYPE_FIELD 0x1F800000
+
+#define E1000_INVM_MAJOR_MASK 0x3F0
+#define E1000_INVM_MINOR_MASK 0xF
+#define E1000_INVM_MAJOR_SHIFT 4
+
+#define ID_LED_DEFAULT_I210 ((ID_LED_OFF1_ON2 << 8) | \
+ (ID_LED_DEF1_DEF2 << 4) | \
+ (ID_LED_OFF1_OFF2))
+#define ID_LED_DEFAULT_I210_SERDES ((ID_LED_DEF1_DEF2 << 8) | \
+ (ID_LED_DEF1_DEF2 << 4) | \
+ (ID_LED_OFF1_ON2))
+
+/* NVM offset defaults for I211 devices */
+#define NVM_INIT_CTRL_2_DEFAULT_I211 0X7243
+#define NVM_INIT_CTRL_4_DEFAULT_I211 0x00C1
+#define NVM_LED_1_CFG_DEFAULT_I211 0x0184
+#define NVM_LED_0_2_CFG_DEFAULT_I211 0x200C
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
new file mode 100644
index 00000000..4ee59ba9
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
@@ -0,0 +1,2096 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw);
+static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw);
+static void e1000_config_collision_dist_generic(struct e1000_hw *hw);
+static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index);
+
+/**
+ * e1000_init_mac_ops_generic - Initialize MAC function pointers
+ * @hw: pointer to the HW structure
+ *
+ * Setups up the function pointers to no-op functions
+ **/
+void e1000_init_mac_ops_generic(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ DEBUGFUNC("e1000_init_mac_ops_generic");
+
+ /* General Setup */
+ mac->ops.init_params = e1000_null_ops_generic;
+ mac->ops.init_hw = e1000_null_ops_generic;
+ mac->ops.reset_hw = e1000_null_ops_generic;
+ mac->ops.setup_physical_interface = e1000_null_ops_generic;
+ mac->ops.get_bus_info = e1000_null_ops_generic;
+ mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pcie;
+ mac->ops.read_mac_addr = e1000_read_mac_addr_generic;
+ mac->ops.config_collision_dist = e1000_config_collision_dist_generic;
+ mac->ops.clear_hw_cntrs = e1000_null_mac_generic;
+ /* LED */
+ mac->ops.cleanup_led = e1000_null_ops_generic;
+ mac->ops.setup_led = e1000_null_ops_generic;
+ mac->ops.blink_led = e1000_null_ops_generic;
+ mac->ops.led_on = e1000_null_ops_generic;
+ mac->ops.led_off = e1000_null_ops_generic;
+ /* LINK */
+ mac->ops.setup_link = e1000_null_ops_generic;
+ mac->ops.get_link_up_info = e1000_null_link_info;
+ mac->ops.check_for_link = e1000_null_ops_generic;
+ /* Management */
+ mac->ops.check_mng_mode = e1000_null_mng_mode;
+ /* VLAN, MC, etc. */
+ mac->ops.update_mc_addr_list = e1000_null_update_mc;
+ mac->ops.clear_vfta = e1000_null_mac_generic;
+ mac->ops.write_vfta = e1000_null_write_vfta;
+ mac->ops.rar_set = e1000_rar_set_generic;
+ mac->ops.validate_mdi_setting = e1000_validate_mdi_setting_generic;
+}
+
+/**
+ * e1000_null_ops_generic - No-op function, returns 0
+ * @hw: pointer to the HW structure
+ **/
+s32 e1000_null_ops_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+ DEBUGFUNC("e1000_null_ops_generic");
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_null_mac_generic - No-op function, return void
+ * @hw: pointer to the HW structure
+ **/
+void e1000_null_mac_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+ DEBUGFUNC("e1000_null_mac_generic");
+ return;
+}
+
+/**
+ * e1000_null_link_info - No-op function, return 0
+ * @hw: pointer to the HW structure
+ **/
+s32 e1000_null_link_info(struct e1000_hw E1000_UNUSEDARG *hw,
+ u16 E1000_UNUSEDARG *s, u16 E1000_UNUSEDARG *d)
+{
+ DEBUGFUNC("e1000_null_link_info");
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_null_mng_mode - No-op function, return false
+ * @hw: pointer to the HW structure
+ **/
+bool e1000_null_mng_mode(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+ DEBUGFUNC("e1000_null_mng_mode");
+ return false;
+}
+
+/**
+ * e1000_null_update_mc - No-op function, return void
+ * @hw: pointer to the HW structure
+ **/
+void e1000_null_update_mc(struct e1000_hw E1000_UNUSEDARG *hw,
+ u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a)
+{
+ DEBUGFUNC("e1000_null_update_mc");
+ return;
+}
+
+/**
+ * e1000_null_write_vfta - No-op function, return void
+ * @hw: pointer to the HW structure
+ **/
+void e1000_null_write_vfta(struct e1000_hw E1000_UNUSEDARG *hw,
+ u32 E1000_UNUSEDARG a, u32 E1000_UNUSEDARG b)
+{
+ DEBUGFUNC("e1000_null_write_vfta");
+ return;
+}
+
+/**
+ * e1000_null_rar_set - No-op function, return void
+ * @hw: pointer to the HW structure
+ **/
+void e1000_null_rar_set(struct e1000_hw E1000_UNUSEDARG *hw,
+ u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a)
+{
+ DEBUGFUNC("e1000_null_rar_set");
+ return;
+}
+
+/**
+ * e1000_get_bus_info_pcie_generic - Get PCIe bus information
+ * @hw: pointer to the HW structure
+ *
+ * Determines and stores the system bus information for a particular
+ * network interface. The following bus information is determined and stored:
+ * bus speed, bus width, type (PCIe), and PCIe function.
+ **/
+s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ struct e1000_bus_info *bus = &hw->bus;
+ s32 ret_val;
+ u16 pcie_link_status;
+
+ DEBUGFUNC("e1000_get_bus_info_pcie_generic");
+
+ bus->type = e1000_bus_type_pci_express;
+
+ ret_val = e1000_read_pcie_cap_reg(hw, PCIE_LINK_STATUS,
+ &pcie_link_status);
+ if (ret_val) {
+ bus->width = e1000_bus_width_unknown;
+ bus->speed = e1000_bus_speed_unknown;
+ } else {
+ switch (pcie_link_status & PCIE_LINK_SPEED_MASK) {
+ case PCIE_LINK_SPEED_2500:
+ bus->speed = e1000_bus_speed_2500;
+ break;
+ case PCIE_LINK_SPEED_5000:
+ bus->speed = e1000_bus_speed_5000;
+ break;
+ default:
+ bus->speed = e1000_bus_speed_unknown;
+ break;
+ }
+
+ bus->width = (enum e1000_bus_width)((pcie_link_status &
+ PCIE_LINK_WIDTH_MASK) >> PCIE_LINK_WIDTH_SHIFT);
+ }
+
+ mac->ops.set_lan_id(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices
+ *
+ * @hw: pointer to the HW structure
+ *
+ * Determines the LAN function id by reading memory-mapped registers
+ * and swaps the port value if requested.
+ **/
+static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw)
+{
+ struct e1000_bus_info *bus = &hw->bus;
+ u32 reg;
+
+ /* The status register reports the correct function number
+ * for the device regardless of function swap state.
+ */
+ reg = E1000_READ_REG(hw, E1000_STATUS);
+ bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
+}
+
+/**
+ * e1000_set_lan_id_single_port - Set LAN id for a single port device
+ * @hw: pointer to the HW structure
+ *
+ * Sets the LAN function id to zero for a single port device.
+ **/
+void e1000_set_lan_id_single_port(struct e1000_hw *hw)
+{
+ struct e1000_bus_info *bus = &hw->bus;
+
+ bus->func = 0;
+}
+
+/**
+ * e1000_clear_vfta_generic - Clear VLAN filter table
+ * @hw: pointer to the HW structure
+ *
+ * Clears the register array which contains the VLAN filter table by
+ * setting all the values to 0.
+ **/
+void e1000_clear_vfta_generic(struct e1000_hw *hw)
+{
+ u32 offset;
+
+ DEBUGFUNC("e1000_clear_vfta_generic");
+
+ for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
+ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0);
+ E1000_WRITE_FLUSH(hw);
+ }
+}
+
+/**
+ * e1000_write_vfta_generic - Write value to VLAN filter table
+ * @hw: pointer to the HW structure
+ * @offset: register offset in VLAN filter table
+ * @value: register value written to VLAN filter table
+ *
+ * Writes value at the given offset in the register array which stores
+ * the VLAN filter table.
+ **/
+void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value)
+{
+ DEBUGFUNC("e1000_write_vfta_generic");
+
+ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value);
+ E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ * e1000_init_rx_addrs_generic - Initialize receive address's
+ * @hw: pointer to the HW structure
+ * @rar_count: receive address registers
+ *
+ * Setup the receive address registers by setting the base receive address
+ * register to the devices MAC address and clearing all the other receive
+ * address registers to 0.
+ **/
+void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count)
+{
+ u32 i;
+ u8 mac_addr[ETH_ADDR_LEN] = {0};
+
+ DEBUGFUNC("e1000_init_rx_addrs_generic");
+
+ /* Setup the receive address */
+ DEBUGOUT("Programming MAC Address into RAR[0]\n");
+
+ hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
+
+ /* Zero out the other (rar_entry_count - 1) receive addresses */
+ DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1);
+ for (i = 1; i < rar_count; i++)
+ hw->mac.ops.rar_set(hw, mac_addr, i);
+}
+
+/**
+ * e1000_check_alt_mac_addr_generic - Check for alternate MAC addr
+ * @hw: pointer to the HW structure
+ *
+ * Checks the nvm for an alternate MAC address. An alternate MAC address
+ * can be setup by pre-boot software and must be treated like a permanent
+ * address and must override the actual permanent MAC address. If an
+ * alternate MAC address is found it is programmed into RAR0, replacing
+ * the permanent address that was installed into RAR0 by the Si on reset.
+ * This function will return SUCCESS unless it encounters an error while
+ * reading the EEPROM.
+ **/
+s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw)
+{
+ u32 i;
+ s32 ret_val;
+ u16 offset, nvm_alt_mac_addr_offset, nvm_data;
+ u8 alt_mac_addr[ETH_ADDR_LEN];
+
+ DEBUGFUNC("e1000_check_alt_mac_addr_generic");
+
+ ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &nvm_data);
+ if (ret_val)
+ return ret_val;
+
+
+ /* Alternate MAC address is handled by the option ROM for 82580
+ * and newer. SW support not required.
+ */
+ if (hw->mac.type >= e1000_82580)
+ return E1000_SUCCESS;
+
+ ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1,
+ &nvm_alt_mac_addr_offset);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+
+ if ((nvm_alt_mac_addr_offset == 0xFFFF) ||
+ (nvm_alt_mac_addr_offset == 0x0000))
+ /* There is no Alternate MAC Address */
+ return E1000_SUCCESS;
+
+ if (hw->bus.func == E1000_FUNC_1)
+ nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1;
+ if (hw->bus.func == E1000_FUNC_2)
+ nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN2;
+
+ if (hw->bus.func == E1000_FUNC_3)
+ nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN3;
+ for (i = 0; i < ETH_ADDR_LEN; i += 2) {
+ offset = nvm_alt_mac_addr_offset + (i >> 1);
+ ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+
+ alt_mac_addr[i] = (u8)(nvm_data & 0xFF);
+ alt_mac_addr[i + 1] = (u8)(nvm_data >> 8);
+ }
+
+ /* if multicast bit is set, the alternate address will not be used */
+ if (alt_mac_addr[0] & 0x01) {
+ DEBUGOUT("Ignoring Alternate Mac Address with MC bit set\n");
+ return E1000_SUCCESS;
+ }
+
+ /* We have a valid alternate MAC address, and we want to treat it the
+ * same as the normal permanent MAC address stored by the HW into the
+ * RAR. Do this by mapping this address into RAR0.
+ */
+ hw->mac.ops.rar_set(hw, alt_mac_addr, 0);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_rar_set_generic - Set receive address register
+ * @hw: pointer to the HW structure
+ * @addr: pointer to the receive address
+ * @index: receive address array register
+ *
+ * Sets the receive address array register at index to the address passed
+ * in by addr.
+ **/
+static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index)
+{
+ u32 rar_low, rar_high;
+
+ DEBUGFUNC("e1000_rar_set_generic");
+
+ /* HW expects these in little endian so we reverse the byte order
+ * from network order (big endian) to little endian
+ */
+ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
+ ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
+
+ rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
+
+ /* If MAC address zero, no need to set the AV bit */
+ if (rar_low || rar_high)
+ rar_high |= E1000_RAH_AV;
+
+ /* Some bridges will combine consecutive 32-bit writes into
+ * a single burst write, which will malfunction on some parts.
+ * The flushes avoid this.
+ */
+ E1000_WRITE_REG(hw, E1000_RAL(index), rar_low);
+ E1000_WRITE_FLUSH(hw);
+ E1000_WRITE_REG(hw, E1000_RAH(index), rar_high);
+ E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ * e1000_hash_mc_addr_generic - Generate a multicast hash value
+ * @hw: pointer to the HW structure
+ * @mc_addr: pointer to a multicast address
+ *
+ * Generates a multicast address hash value which is used to determine
+ * the multicast filter table array address and new table value.
+ **/
+u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr)
+{
+ u32 hash_value, hash_mask;
+ u8 bit_shift = 0;
+
+ DEBUGFUNC("e1000_hash_mc_addr_generic");
+
+ /* Register count multiplied by bits per register */
+ hash_mask = (hw->mac.mta_reg_count * 32) - 1;
+
+ /* For a mc_filter_type of 0, bit_shift is the number of left-shifts
+ * where 0xFF would still fall within the hash mask.
+ */
+ while (hash_mask >> bit_shift != 0xFF)
+ bit_shift++;
+
+ /* The portion of the address that is used for the hash table
+ * is determined by the mc_filter_type setting.
+ * The algorithm is such that there is a total of 8 bits of shifting.
+ * The bit_shift for a mc_filter_type of 0 represents the number of
+ * left-shifts where the MSB of mc_addr[5] would still fall within
+ * the hash_mask. Case 0 does this exactly. Since there are a total
+ * of 8 bits of shifting, then mc_addr[4] will shift right the
+ * remaining number of bits. Thus 8 - bit_shift. The rest of the
+ * cases are a variation of this algorithm...essentially raising the
+ * number of bits to shift mc_addr[5] left, while still keeping the
+ * 8-bit shifting total.
+ *
+ * For example, given the following Destination MAC Address and an
+ * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask),
+ * we can see that the bit_shift for case 0 is 4. These are the hash
+ * values resulting from each mc_filter_type...
+ * [0] [1] [2] [3] [4] [5]
+ * 01 AA 00 12 34 56
+ * LSB MSB
+ *
+ * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563
+ * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6
+ * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163
+ * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634
+ */
+ switch (hw->mac.mc_filter_type) {
+ default:
+ case 0:
+ break;
+ case 1:
+ bit_shift += 1;
+ break;
+ case 2:
+ bit_shift += 2;
+ break;
+ case 3:
+ bit_shift += 4;
+ break;
+ }
+
+ hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
+ (((u16) mc_addr[5]) << bit_shift)));
+
+ return hash_value;
+}
+
+/**
+ * e1000_update_mc_addr_list_generic - Update Multicast addresses
+ * @hw: pointer to the HW structure
+ * @mc_addr_list: array of multicast addresses to program
+ * @mc_addr_count: number of multicast addresses to program
+ *
+ * Updates entire Multicast Table Array.
+ * The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void e1000_update_mc_addr_list_generic(struct e1000_hw *hw,
+ u8 *mc_addr_list, u32 mc_addr_count)
+{
+ u32 hash_value, hash_bit, hash_reg;
+ int i;
+
+ DEBUGFUNC("e1000_update_mc_addr_list_generic");
+
+ /* clear mta_shadow */
+ memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+
+ /* update mta_shadow from mc_addr_list */
+ for (i = 0; (u32) i < mc_addr_count; i++) {
+ hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list);
+
+ hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+ hash_bit = hash_value & 0x1F;
+
+ hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
+ mc_addr_list += (ETH_ADDR_LEN);
+ }
+
+ /* replace the entire MTA table */
+ for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]);
+ E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ * e1000_clear_hw_cntrs_base_generic - Clear base hardware counters
+ * @hw: pointer to the HW structure
+ *
+ * Clears the base hardware counters by reading the counter registers.
+ **/
+void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_clear_hw_cntrs_base_generic");
+
+ E1000_READ_REG(hw, E1000_CRCERRS);
+ E1000_READ_REG(hw, E1000_SYMERRS);
+ E1000_READ_REG(hw, E1000_MPC);
+ E1000_READ_REG(hw, E1000_SCC);
+ E1000_READ_REG(hw, E1000_ECOL);
+ E1000_READ_REG(hw, E1000_MCC);
+ E1000_READ_REG(hw, E1000_LATECOL);
+ E1000_READ_REG(hw, E1000_COLC);
+ E1000_READ_REG(hw, E1000_DC);
+ E1000_READ_REG(hw, E1000_SEC);
+ E1000_READ_REG(hw, E1000_RLEC);
+ E1000_READ_REG(hw, E1000_XONRXC);
+ E1000_READ_REG(hw, E1000_XONTXC);
+ E1000_READ_REG(hw, E1000_XOFFRXC);
+ E1000_READ_REG(hw, E1000_XOFFTXC);
+ E1000_READ_REG(hw, E1000_FCRUC);
+ E1000_READ_REG(hw, E1000_GPRC);
+ E1000_READ_REG(hw, E1000_BPRC);
+ E1000_READ_REG(hw, E1000_MPRC);
+ E1000_READ_REG(hw, E1000_GPTC);
+ E1000_READ_REG(hw, E1000_GORCL);
+ E1000_READ_REG(hw, E1000_GORCH);
+ E1000_READ_REG(hw, E1000_GOTCL);
+ E1000_READ_REG(hw, E1000_GOTCH);
+ E1000_READ_REG(hw, E1000_RNBC);
+ E1000_READ_REG(hw, E1000_RUC);
+ E1000_READ_REG(hw, E1000_RFC);
+ E1000_READ_REG(hw, E1000_ROC);
+ E1000_READ_REG(hw, E1000_RJC);
+ E1000_READ_REG(hw, E1000_TORL);
+ E1000_READ_REG(hw, E1000_TORH);
+ E1000_READ_REG(hw, E1000_TOTL);
+ E1000_READ_REG(hw, E1000_TOTH);
+ E1000_READ_REG(hw, E1000_TPR);
+ E1000_READ_REG(hw, E1000_TPT);
+ E1000_READ_REG(hw, E1000_MPTC);
+ E1000_READ_REG(hw, E1000_BPTC);
+}
+
+/**
+ * e1000_check_for_copper_link_generic - Check for link (Copper)
+ * @hw: pointer to the HW structure
+ *
+ * Checks to see of the link status of the hardware has changed. If a
+ * change in link status has been detected, then we read the PHY registers
+ * to get the current speed/duplex if link exists.
+ **/
+s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ s32 ret_val;
+ bool link;
+
+ DEBUGFUNC("e1000_check_for_copper_link");
+
+ /* We only want to go out to the PHY registers to see if Auto-Neg
+ * has completed and/or if our link status has changed. The
+ * get_link_status flag is set upon receiving a Link Status
+ * Change or Rx Sequence Error interrupt.
+ */
+ if (!mac->get_link_status)
+ return E1000_SUCCESS;
+
+ /* First we want to see if the MII Status Register reports
+ * link. If so, then we want to get the current speed/duplex
+ * of the PHY.
+ */
+ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
+ if (ret_val)
+ return ret_val;
+
+ if (!link)
+ return E1000_SUCCESS; /* No link detected */
+
+ mac->get_link_status = false;
+
+ /* Check if there was DownShift, must be checked
+ * immediately after link-up
+ */
+ e1000_check_downshift_generic(hw);
+
+ /* If we are forcing speed/duplex, then we simply return since
+ * we have already determined whether we have link or not.
+ */
+ if (!mac->autoneg)
+ return -E1000_ERR_CONFIG;
+
+ /* Auto-Neg is enabled. Auto Speed Detection takes care
+ * of MAC speed/duplex configuration. So we only need to
+ * configure Collision Distance in the MAC.
+ */
+ mac->ops.config_collision_dist(hw);
+
+ /* Configure Flow Control now that Auto-Neg has completed.
+ * First, we need to restore the desired flow control
+ * settings because we may have had to re-autoneg with a
+ * different link partner.
+ */
+ ret_val = e1000_config_fc_after_link_up_generic(hw);
+ if (ret_val)
+ DEBUGOUT("Error configuring flow control\n");
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_for_fiber_link_generic - Check for link (Fiber)
+ * @hw: pointer to the HW structure
+ *
+ * Checks for link up on the hardware. If link is not up and we have
+ * a signal, then we need to force link up.
+ **/
+s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ u32 rxcw;
+ u32 ctrl;
+ u32 status;
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_check_for_fiber_link_generic");
+
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ status = E1000_READ_REG(hw, E1000_STATUS);
+ rxcw = E1000_READ_REG(hw, E1000_RXCW);
+
+ /* If we don't have link (auto-negotiation failed or link partner
+ * cannot auto-negotiate), the cable is plugged in (we have signal),
+ * and our link partner is not trying to auto-negotiate with us (we
+ * are receiving idles or data), we need to force link up. We also
+ * need to give auto-negotiation time to complete, in case the cable
+ * was just plugged in. The autoneg_failed flag does this.
+ */
+ /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
+ if ((ctrl & E1000_CTRL_SWDPIN1) && !(status & E1000_STATUS_LU) &&
+ !(rxcw & E1000_RXCW_C)) {
+ if (!mac->autoneg_failed) {
+ mac->autoneg_failed = true;
+ return E1000_SUCCESS;
+ }
+ DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n");
+
+ /* Disable auto-negotiation in the TXCW register */
+ E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE));
+
+ /* Force link-up and also force full-duplex. */
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+ /* Configure Flow Control after forcing link up. */
+ ret_val = e1000_config_fc_after_link_up_generic(hw);
+ if (ret_val) {
+ DEBUGOUT("Error configuring flow control\n");
+ return ret_val;
+ }
+ } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
+ /* If we are forcing link and we are receiving /C/ ordered
+ * sets, re-enable auto-negotiation in the TXCW register
+ * and disable forced link in the Device Control register
+ * in an attempt to auto-negotiate with our link partner.
+ */
+ DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n");
+ E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw);
+ E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU));
+
+ mac->serdes_has_link = true;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_check_for_serdes_link_generic - Check for link (Serdes)
+ * @hw: pointer to the HW structure
+ *
+ * Checks for link up on the hardware. If link is not up and we have
+ * a signal, then we need to force link up.
+ **/
+s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ u32 rxcw;
+ u32 ctrl;
+ u32 status;
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_check_for_serdes_link_generic");
+
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ status = E1000_READ_REG(hw, E1000_STATUS);
+ rxcw = E1000_READ_REG(hw, E1000_RXCW);
+
+ /* If we don't have link (auto-negotiation failed or link partner
+ * cannot auto-negotiate), and our link partner is not trying to
+ * auto-negotiate with us (we are receiving idles or data),
+ * we need to force link up. We also need to give auto-negotiation
+ * time to complete.
+ */
+ /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
+ if (!(status & E1000_STATUS_LU) && !(rxcw & E1000_RXCW_C)) {
+ if (!mac->autoneg_failed) {
+ mac->autoneg_failed = true;
+ return E1000_SUCCESS;
+ }
+ DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n");
+
+ /* Disable auto-negotiation in the TXCW register */
+ E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE));
+
+ /* Force link-up and also force full-duplex. */
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+ /* Configure Flow Control after forcing link up. */
+ ret_val = e1000_config_fc_after_link_up_generic(hw);
+ if (ret_val) {
+ DEBUGOUT("Error configuring flow control\n");
+ return ret_val;
+ }
+ } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
+ /* If we are forcing link and we are receiving /C/ ordered
+ * sets, re-enable auto-negotiation in the TXCW register
+ * and disable forced link in the Device Control register
+ * in an attempt to auto-negotiate with our link partner.
+ */
+ DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n");
+ E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw);
+ E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU));
+
+ mac->serdes_has_link = true;
+ } else if (!(E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW))) {
+ /* If we force link for non-auto-negotiation switch, check
+ * link status based on MAC synchronization for internal
+ * serdes media type.
+ */
+ /* SYNCH bit and IV bit are sticky. */
+ usec_delay(10);
+ rxcw = E1000_READ_REG(hw, E1000_RXCW);
+ if (rxcw & E1000_RXCW_SYNCH) {
+ if (!(rxcw & E1000_RXCW_IV)) {
+ mac->serdes_has_link = true;
+ DEBUGOUT("SERDES: Link up - forced.\n");
+ }
+ } else {
+ mac->serdes_has_link = false;
+ DEBUGOUT("SERDES: Link down - force failed.\n");
+ }
+ }
+
+ if (E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW)) {
+ status = E1000_READ_REG(hw, E1000_STATUS);
+ if (status & E1000_STATUS_LU) {
+ /* SYNCH bit and IV bit are sticky, so reread rxcw. */
+ usec_delay(10);
+ rxcw = E1000_READ_REG(hw, E1000_RXCW);
+ if (rxcw & E1000_RXCW_SYNCH) {
+ if (!(rxcw & E1000_RXCW_IV)) {
+ mac->serdes_has_link = true;
+ DEBUGOUT("SERDES: Link up - autoneg completed successfully.\n");
+ } else {
+ mac->serdes_has_link = false;
+ DEBUGOUT("SERDES: Link down - invalid codewords detected in autoneg.\n");
+ }
+ } else {
+ mac->serdes_has_link = false;
+ DEBUGOUT("SERDES: Link down - no sync.\n");
+ }
+ } else {
+ mac->serdes_has_link = false;
+ DEBUGOUT("SERDES: Link down - autoneg failed\n");
+ }
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_set_default_fc_generic - Set flow control default values
+ * @hw: pointer to the HW structure
+ *
+ * Read the EEPROM for the default values for flow control and store the
+ * values.
+ **/
+static s32 e1000_set_default_fc_generic(struct e1000_hw *hw)
+{
+ s32 ret_val;
+ u16 nvm_data;
+
+ DEBUGFUNC("e1000_set_default_fc_generic");
+
+ /* Read and store word 0x0F of the EEPROM. This word contains bits
+ * that determine the hardware's default PAUSE (flow control) mode,
+ * a bit that determines whether the HW defaults to enabling or
+ * disabling auto-negotiation, and the direction of the
+ * SW defined pins. If there is no SW over-ride of the flow
+ * control setting, then the variable hw->fc will
+ * be initialized based on a value in the EEPROM.
+ */
+ ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data);
+
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+
+ if (!(nvm_data & NVM_WORD0F_PAUSE_MASK))
+ hw->fc.requested_mode = e1000_fc_none;
+ else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) ==
+ NVM_WORD0F_ASM_DIR)
+ hw->fc.requested_mode = e1000_fc_tx_pause;
+ else
+ hw->fc.requested_mode = e1000_fc_full;
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_setup_link_generic - Setup flow control and link settings
+ * @hw: pointer to the HW structure
+ *
+ * Determines which flow control settings to use, then configures flow
+ * control. Calls the appropriate media-specific link configuration
+ * function. Assuming the adapter has a valid link partner, a valid link
+ * should be established. Assumes the hardware has previously been reset
+ * and the transmitter and receiver are not enabled.
+ **/
+s32 e1000_setup_link_generic(struct e1000_hw *hw)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_setup_link_generic");
+
+ /* In the case of the phy reset being blocked, we already have a link.
+ * We do not need to set it up again.
+ */
+ if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw))
+ return E1000_SUCCESS;
+
+ /* If requested flow control is set to default, set flow control
+ * based on the EEPROM flow control settings.
+ */
+ if (hw->fc.requested_mode == e1000_fc_default) {
+ ret_val = e1000_set_default_fc_generic(hw);
+ if (ret_val)
+ return ret_val;
+ }
+
+ /* Save off the requested flow control mode for use later. Depending
+ * on the link partner's capabilities, we may or may not use this mode.
+ */
+ hw->fc.current_mode = hw->fc.requested_mode;
+
+ DEBUGOUT1("After fix-ups FlowControl is now = %x\n",
+ hw->fc.current_mode);
+
+ /* Call the necessary media_type subroutine to configure the link. */
+ ret_val = hw->mac.ops.setup_physical_interface(hw);
+ if (ret_val)
+ return ret_val;
+
+ /* Initialize the flow control address, type, and PAUSE timer
+ * registers to their default values. This is done even if flow
+ * control is disabled, because it does not hurt anything to
+ * initialize these registers.
+ */
+ DEBUGOUT("Initializing the Flow Control address, type and timer regs\n");
+ E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE);
+ E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+ E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW);
+
+ E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time);
+
+ return e1000_set_fc_watermarks_generic(hw);
+}
+
+/**
+ * e1000_commit_fc_settings_generic - Configure flow control
+ * @hw: pointer to the HW structure
+ *
+ * Write the flow control settings to the Transmit Config Word Register (TXCW)
+ * base on the flow control settings in e1000_mac_info.
+ **/
+static s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ u32 txcw;
+
+ DEBUGFUNC("e1000_commit_fc_settings_generic");
+
+ /* Check for a software override of the flow control settings, and
+ * setup the device accordingly. If auto-negotiation is enabled, then
+ * software will have to set the "PAUSE" bits to the correct value in
+ * the Transmit Config Word Register (TXCW) and re-start auto-
+ * negotiation. However, if auto-negotiation is disabled, then
+ * software will have to manually configure the two flow control enable
+ * bits in the CTRL register.
+ *
+ * The possible values of the "fc" parameter are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause frames,
+ * but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames but we
+ * do not support receiving pause frames).
+ * 3: Both Rx and Tx flow control (symmetric) are enabled.
+ */
+ switch (hw->fc.current_mode) {
+ case e1000_fc_none:
+ /* Flow control completely disabled by a software over-ride. */
+ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD);
+ break;
+ case e1000_fc_rx_pause:
+ /* Rx Flow control is enabled and Tx Flow control is disabled
+ * by a software over-ride. Since there really isn't a way to
+ * advertise that we are capable of Rx Pause ONLY, we will
+ * advertise that we support both symmetric and asymmetric Rx
+ * PAUSE. Later, we will disable the adapter's ability to send
+ * PAUSE frames.
+ */
+ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+ break;
+ case e1000_fc_tx_pause:
+ /* Tx Flow control is enabled, and Rx Flow control is disabled,
+ * by a software over-ride.
+ */
+ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR);
+ break;
+ case e1000_fc_full:
+ /* Flow control (both Rx and Tx) is enabled by a software
+ * over-ride.
+ */
+ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+ break;
+ default:
+ DEBUGOUT("Flow control param set incorrectly\n");
+ return -E1000_ERR_CONFIG;
+ break;
+ }
+
+ E1000_WRITE_REG(hw, E1000_TXCW, txcw);
+ mac->txcw = txcw;
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_poll_fiber_serdes_link_generic - Poll for link up
+ * @hw: pointer to the HW structure
+ *
+ * Polls for link up by reading the status register, if link fails to come
+ * up with auto-negotiation, then the link is forced if a signal is detected.
+ **/
+static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ u32 i, status;
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_poll_fiber_serdes_link_generic");
+
+ /* If we have a signal (the cable is plugged in, or assumed true for
+ * serdes media) then poll for a "Link-Up" indication in the Device
+ * Status Register. Time-out if a link isn't seen in 500 milliseconds
+ * seconds (Auto-negotiation should complete in less than 500
+ * milliseconds even if the other end is doing it in SW).
+ */
+ for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) {
+ msec_delay(10);
+ status = E1000_READ_REG(hw, E1000_STATUS);
+ if (status & E1000_STATUS_LU)
+ break;
+ }
+ if (i == FIBER_LINK_UP_LIMIT) {
+ DEBUGOUT("Never got a valid link from auto-neg!!!\n");
+ mac->autoneg_failed = true;
+ /* AutoNeg failed to achieve a link, so we'll call
+ * mac->check_for_link. This routine will force the
+ * link up if we detect a signal. This will allow us to
+ * communicate with non-autonegotiating link partners.
+ */
+ ret_val = mac->ops.check_for_link(hw);
+ if (ret_val) {
+ DEBUGOUT("Error while checking for link\n");
+ return ret_val;
+ }
+ mac->autoneg_failed = false;
+ } else {
+ mac->autoneg_failed = false;
+ DEBUGOUT("Valid Link Found\n");
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_setup_fiber_serdes_link_generic - Setup link for fiber/serdes
+ * @hw: pointer to the HW structure
+ *
+ * Configures collision distance and flow control for fiber and serdes
+ * links. Upon successful setup, poll for link.
+ **/
+s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw)
+{
+ u32 ctrl;
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_setup_fiber_serdes_link_generic");
+
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+
+ /* Take the link out of reset */
+ ctrl &= ~E1000_CTRL_LRST;
+
+ hw->mac.ops.config_collision_dist(hw);
+
+ ret_val = e1000_commit_fc_settings_generic(hw);
+ if (ret_val)
+ return ret_val;
+
+ /* Since auto-negotiation is enabled, take the link out of reset (the
+ * link will be in reset, because we previously reset the chip). This
+ * will restart auto-negotiation. If auto-negotiation is successful
+ * then the link-up status bit will be set and the flow control enable
+ * bits (RFCE and TFCE) will be set according to their negotiated value.
+ */
+ DEBUGOUT("Auto-negotiation enabled\n");
+
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+ E1000_WRITE_FLUSH(hw);
+ msec_delay(1);
+
+ /* For these adapters, the SW definable pin 1 is set when the optics
+ * detect a signal. If we have a signal, then poll for a "Link-Up"
+ * indication.
+ */
+ if (hw->phy.media_type == e1000_media_type_internal_serdes ||
+ (E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) {
+ ret_val = e1000_poll_fiber_serdes_link_generic(hw);
+ } else {
+ DEBUGOUT("No signal detected\n");
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_config_collision_dist_generic - Configure collision distance
+ * @hw: pointer to the HW structure
+ *
+ * Configures the collision distance to the default value and is used
+ * during link setup.
+ **/
+static void e1000_config_collision_dist_generic(struct e1000_hw *hw)
+{
+ u32 tctl;
+
+ DEBUGFUNC("e1000_config_collision_dist_generic");
+
+ tctl = E1000_READ_REG(hw, E1000_TCTL);
+
+ tctl &= ~E1000_TCTL_COLD;
+ tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT;
+
+ E1000_WRITE_REG(hw, E1000_TCTL, tctl);
+ E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ * e1000_set_fc_watermarks_generic - Set flow control high/low watermarks
+ * @hw: pointer to the HW structure
+ *
+ * Sets the flow control high/low threshold (watermark) registers. If
+ * flow control XON frame transmission is enabled, then set XON frame
+ * transmission as well.
+ **/
+s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw)
+{
+ u32 fcrtl = 0, fcrth = 0;
+
+ DEBUGFUNC("e1000_set_fc_watermarks_generic");
+
+ /* Set the flow control receive threshold registers. Normally,
+ * these registers will be set to a default threshold that may be
+ * adjusted later by the driver's runtime code. However, if the
+ * ability to transmit pause frames is not enabled, then these
+ * registers will be set to 0.
+ */
+ if (hw->fc.current_mode & e1000_fc_tx_pause) {
+ /* We need to set up the Receive Threshold high and low water
+ * marks as well as (optionally) enabling the transmission of
+ * XON frames.
+ */
+ fcrtl = hw->fc.low_water;
+ if (hw->fc.send_xon)
+ fcrtl |= E1000_FCRTL_XONE;
+
+ fcrth = hw->fc.high_water;
+ }
+ E1000_WRITE_REG(hw, E1000_FCRTL, fcrtl);
+ E1000_WRITE_REG(hw, E1000_FCRTH, fcrth);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_force_mac_fc_generic - Force the MAC's flow control settings
+ * @hw: pointer to the HW structure
+ *
+ * Force the MAC's flow control settings. Sets the TFCE and RFCE bits in the
+ * device control register to reflect the adapter settings. TFCE and RFCE
+ * need to be explicitly set by software when a copper PHY is used because
+ * autonegotiation is managed by the PHY rather than the MAC. Software must
+ * also configure these bits when link is forced on a fiber connection.
+ **/
+s32 e1000_force_mac_fc_generic(struct e1000_hw *hw)
+{
+ u32 ctrl;
+
+ DEBUGFUNC("e1000_force_mac_fc_generic");
+
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+
+ /* Because we didn't get link via the internal auto-negotiation
+ * mechanism (we either forced link or we got link via PHY
+ * auto-neg), we have to manually enable/disable transmit an
+ * receive flow control.
+ *
+ * The "Case" statement below enables/disable flow control
+ * according to the "hw->fc.current_mode" parameter.
+ *
+ * The possible values of the "fc" parameter are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause
+ * frames but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames
+ * frames but we do not receive pause frames).
+ * 3: Both Rx and Tx flow control (symmetric) is enabled.
+ * other: No other values should be possible at this point.
+ */
+ DEBUGOUT1("hw->fc.current_mode = %u\n", hw->fc.current_mode);
+
+ switch (hw->fc.current_mode) {
+ case e1000_fc_none:
+ ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
+ break;
+ case e1000_fc_rx_pause:
+ ctrl &= (~E1000_CTRL_TFCE);
+ ctrl |= E1000_CTRL_RFCE;
+ break;
+ case e1000_fc_tx_pause:
+ ctrl &= (~E1000_CTRL_RFCE);
+ ctrl |= E1000_CTRL_TFCE;
+ break;
+ case e1000_fc_full:
+ ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
+ break;
+ default:
+ DEBUGOUT("Flow control param set incorrectly\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_config_fc_after_link_up_generic - Configures flow control after link
+ * @hw: pointer to the HW structure
+ *
+ * Checks the status of auto-negotiation after link up to ensure that the
+ * speed and duplex were not forced. If the link needed to be forced, then
+ * flow control needs to be forced also. If auto-negotiation is enabled
+ * and did not fail, then we configure flow control based on our link
+ * partner.
+ **/
+s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ s32 ret_val = E1000_SUCCESS;
+ u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg;
+ u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
+ u16 speed, duplex;
+
+ DEBUGFUNC("e1000_config_fc_after_link_up_generic");
+
+ /* Check for the case where we have fiber media and auto-neg failed
+ * so we had to force link. In this case, we need to force the
+ * configuration of the MAC to match the "fc" parameter.
+ */
+ if (mac->autoneg_failed) {
+ if (hw->phy.media_type == e1000_media_type_fiber ||
+ hw->phy.media_type == e1000_media_type_internal_serdes)
+ ret_val = e1000_force_mac_fc_generic(hw);
+ } else {
+ if (hw->phy.media_type == e1000_media_type_copper)
+ ret_val = e1000_force_mac_fc_generic(hw);
+ }
+
+ if (ret_val) {
+ DEBUGOUT("Error forcing flow control settings\n");
+ return ret_val;
+ }
+
+ /* Check for the case where we have copper media and auto-neg is
+ * enabled. In this case, we need to check and see if Auto-Neg
+ * has completed, and if so, how the PHY and link partner has
+ * flow control configured.
+ */
+ if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) {
+ /* Read the MII Status Register and check to see if AutoNeg
+ * has completed. We read this twice because this reg has
+ * some "sticky" (latched) bits.
+ */
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg);
+ if (ret_val)
+ return ret_val;
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg);
+ if (ret_val)
+ return ret_val;
+
+ if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
+ DEBUGOUT("Copper PHY and Auto Neg has not completed.\n");
+ return ret_val;
+ }
+
+ /* The AutoNeg process has completed, so we now need to
+ * read both the Auto Negotiation Advertisement
+ * Register (Address 4) and the Auto_Negotiation Base
+ * Page Ability Register (Address 5) to determine how
+ * flow control was negotiated.
+ */
+ ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV,
+ &mii_nway_adv_reg);
+ if (ret_val)
+ return ret_val;
+ ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY,
+ &mii_nway_lp_ability_reg);
+ if (ret_val)
+ return ret_val;
+
+ /* Two bits in the Auto Negotiation Advertisement Register
+ * (Address 4) and two bits in the Auto Negotiation Base
+ * Page Ability Register (Address 5) determine flow control
+ * for both the PHY and the link partner. The following
+ * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+ * 1999, describes these PAUSE resolution bits and how flow
+ * control is determined based upon these settings.
+ * NOTE: DC = Don't Care
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+ *-------|---------|-------|---------|--------------------
+ * 0 | 0 | DC | DC | e1000_fc_none
+ * 0 | 1 | 0 | DC | e1000_fc_none
+ * 0 | 1 | 1 | 0 | e1000_fc_none
+ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause
+ * 1 | 0 | 0 | DC | e1000_fc_none
+ * 1 | DC | 1 | DC | e1000_fc_full
+ * 1 | 1 | 0 | 0 | e1000_fc_none
+ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause
+ *
+ * Are both PAUSE bits set to 1? If so, this implies
+ * Symmetric Flow Control is enabled at both ends. The
+ * ASM_DIR bits are irrelevant per the spec.
+ *
+ * For Symmetric Flow Control:
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 1 | DC | 1 | DC | E1000_fc_full
+ *
+ */
+ if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
+ /* Now we need to check if the user selected Rx ONLY
+ * of pause frames. In this case, we had to advertise
+ * FULL flow control because we could not advertise Rx
+ * ONLY. Hence, we must now check to see if we need to
+ * turn OFF the TRANSMISSION of PAUSE frames.
+ */
+ if (hw->fc.requested_mode == e1000_fc_full) {
+ hw->fc.current_mode = e1000_fc_full;
+ DEBUGOUT("Flow Control = FULL.\n");
+ } else {
+ hw->fc.current_mode = e1000_fc_rx_pause;
+ DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
+ }
+ }
+ /* For receiving PAUSE frames ONLY.
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause
+ */
+ else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+ hw->fc.current_mode = e1000_fc_tx_pause;
+ DEBUGOUT("Flow Control = Tx PAUSE frames only.\n");
+ }
+ /* For transmitting PAUSE frames ONLY.
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause
+ */
+ else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+ !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+ hw->fc.current_mode = e1000_fc_rx_pause;
+ DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
+ } else {
+ /* Per the IEEE spec, at this point flow control
+ * should be disabled.
+ */
+ hw->fc.current_mode = e1000_fc_none;
+ DEBUGOUT("Flow Control = NONE.\n");
+ }
+
+ /* Now we need to do one last check... If we auto-
+ * negotiated to HALF DUPLEX, flow control should not be
+ * enabled per IEEE 802.3 spec.
+ */
+ ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex);
+ if (ret_val) {
+ DEBUGOUT("Error getting link speed and duplex\n");
+ return ret_val;
+ }
+
+ if (duplex == HALF_DUPLEX)
+ hw->fc.current_mode = e1000_fc_none;
+
+ /* Now we call a subroutine to actually force the MAC
+ * controller to use the correct flow control settings.
+ */
+ ret_val = e1000_force_mac_fc_generic(hw);
+ if (ret_val) {
+ DEBUGOUT("Error forcing flow control settings\n");
+ return ret_val;
+ }
+ }
+
+ /* Check for the case where we have SerDes media and auto-neg is
+ * enabled. In this case, we need to check and see if Auto-Neg
+ * has completed, and if so, how the PHY and link partner has
+ * flow control configured.
+ */
+ if ((hw->phy.media_type == e1000_media_type_internal_serdes) &&
+ mac->autoneg) {
+ /* Read the PCS_LSTS and check to see if AutoNeg
+ * has completed.
+ */
+ pcs_status_reg = E1000_READ_REG(hw, E1000_PCS_LSTAT);
+
+ if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) {
+ DEBUGOUT("PCS Auto Neg has not completed.\n");
+ return ret_val;
+ }
+
+ /* The AutoNeg process has completed, so we now need to
+ * read both the Auto Negotiation Advertisement
+ * Register (PCS_ANADV) and the Auto_Negotiation Base
+ * Page Ability Register (PCS_LPAB) to determine how
+ * flow control was negotiated.
+ */
+ pcs_adv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV);
+ pcs_lp_ability_reg = E1000_READ_REG(hw, E1000_PCS_LPAB);
+
+ /* Two bits in the Auto Negotiation Advertisement Register
+ * (PCS_ANADV) and two bits in the Auto Negotiation Base
+ * Page Ability Register (PCS_LPAB) determine flow control
+ * for both the PHY and the link partner. The following
+ * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+ * 1999, describes these PAUSE resolution bits and how flow
+ * control is determined based upon these settings.
+ * NOTE: DC = Don't Care
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+ *-------|---------|-------|---------|--------------------
+ * 0 | 0 | DC | DC | e1000_fc_none
+ * 0 | 1 | 0 | DC | e1000_fc_none
+ * 0 | 1 | 1 | 0 | e1000_fc_none
+ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause
+ * 1 | 0 | 0 | DC | e1000_fc_none
+ * 1 | DC | 1 | DC | e1000_fc_full
+ * 1 | 1 | 0 | 0 | e1000_fc_none
+ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause
+ *
+ * Are both PAUSE bits set to 1? If so, this implies
+ * Symmetric Flow Control is enabled at both ends. The
+ * ASM_DIR bits are irrelevant per the spec.
+ *
+ * For Symmetric Flow Control:
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 1 | DC | 1 | DC | e1000_fc_full
+ *
+ */
+ if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
+ (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) {
+ /* Now we need to check if the user selected Rx ONLY
+ * of pause frames. In this case, we had to advertise
+ * FULL flow control because we could not advertise Rx
+ * ONLY. Hence, we must now check to see if we need to
+ * turn OFF the TRANSMISSION of PAUSE frames.
+ */
+ if (hw->fc.requested_mode == e1000_fc_full) {
+ hw->fc.current_mode = e1000_fc_full;
+ DEBUGOUT("Flow Control = FULL.\n");
+ } else {
+ hw->fc.current_mode = e1000_fc_rx_pause;
+ DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
+ }
+ }
+ /* For receiving PAUSE frames ONLY.
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause
+ */
+ else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) &&
+ (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
+ (pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
+ (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
+ hw->fc.current_mode = e1000_fc_tx_pause;
+ DEBUGOUT("Flow Control = Tx PAUSE frames only.\n");
+ }
+ /* For transmitting PAUSE frames ONLY.
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause
+ */
+ else if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
+ (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
+ !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
+ (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
+ hw->fc.current_mode = e1000_fc_rx_pause;
+ DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
+ } else {
+ /* Per the IEEE spec, at this point flow control
+ * should be disabled.
+ */
+ hw->fc.current_mode = e1000_fc_none;
+ DEBUGOUT("Flow Control = NONE.\n");
+ }
+
+ /* Now we call a subroutine to actually force the MAC
+ * controller to use the correct flow control settings.
+ */
+ pcs_ctrl_reg = E1000_READ_REG(hw, E1000_PCS_LCTL);
+ pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL;
+ E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_ctrl_reg);
+
+ ret_val = e1000_force_mac_fc_generic(hw);
+ if (ret_val) {
+ DEBUGOUT("Error forcing flow control settings\n");
+ return ret_val;
+ }
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_get_speed_and_duplex_copper_generic - Retrieve current speed/duplex
+ * @hw: pointer to the HW structure
+ * @speed: stores the current speed
+ * @duplex: stores the current duplex
+ *
+ * Read the status register for the current speed/duplex and store the current
+ * speed and duplex for copper connections.
+ **/
+s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed,
+ u16 *duplex)
+{
+ u32 status;
+
+ DEBUGFUNC("e1000_get_speed_and_duplex_copper_generic");
+
+ status = E1000_READ_REG(hw, E1000_STATUS);
+ if (status & E1000_STATUS_SPEED_1000) {
+ *speed = SPEED_1000;
+ DEBUGOUT("1000 Mbs, ");
+ } else if (status & E1000_STATUS_SPEED_100) {
+ *speed = SPEED_100;
+ DEBUGOUT("100 Mbs, ");
+ } else {
+ *speed = SPEED_10;
+ DEBUGOUT("10 Mbs, ");
+ }
+
+ if (status & E1000_STATUS_FD) {
+ *duplex = FULL_DUPLEX;
+ DEBUGOUT("Full Duplex\n");
+ } else {
+ *duplex = HALF_DUPLEX;
+ DEBUGOUT("Half Duplex\n");
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_get_speed_and_duplex_fiber_generic - Retrieve current speed/duplex
+ * @hw: pointer to the HW structure
+ * @speed: stores the current speed
+ * @duplex: stores the current duplex
+ *
+ * Sets the speed and duplex to gigabit full duplex (the only possible option)
+ * for fiber/serdes links.
+ **/
+s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw E1000_UNUSEDARG *hw,
+ u16 *speed, u16 *duplex)
+{
+ DEBUGFUNC("e1000_get_speed_and_duplex_fiber_serdes_generic");
+
+ *speed = SPEED_1000;
+ *duplex = FULL_DUPLEX;
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_get_hw_semaphore_generic - Acquire hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the HW semaphore to access the PHY or NVM
+ **/
+s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw)
+{
+ u32 swsm;
+ s32 timeout = hw->nvm.word_size + 1;
+ s32 i = 0;
+
+ DEBUGFUNC("e1000_get_hw_semaphore_generic");
+
+ /* Get the SW semaphore */
+ while (i < timeout) {
+ swsm = E1000_READ_REG(hw, E1000_SWSM);
+ if (!(swsm & E1000_SWSM_SMBI))
+ break;
+
+ usec_delay(50);
+ i++;
+ }
+
+ if (i == timeout) {
+ DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
+ return -E1000_ERR_NVM;
+ }
+
+ /* Get the FW semaphore. */
+ for (i = 0; i < timeout; i++) {
+ swsm = E1000_READ_REG(hw, E1000_SWSM);
+ E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
+
+ /* Semaphore acquired if bit latched */
+ if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
+ break;
+
+ usec_delay(50);
+ }
+
+ if (i == timeout) {
+ /* Release semaphores */
+ e1000_put_hw_semaphore_generic(hw);
+ DEBUGOUT("Driver can't access the NVM\n");
+ return -E1000_ERR_NVM;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_put_hw_semaphore_generic - Release hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Release hardware semaphore used to access the PHY or NVM
+ **/
+void e1000_put_hw_semaphore_generic(struct e1000_hw *hw)
+{
+ u32 swsm;
+
+ DEBUGFUNC("e1000_put_hw_semaphore_generic");
+
+ swsm = E1000_READ_REG(hw, E1000_SWSM);
+
+ swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
+
+ E1000_WRITE_REG(hw, E1000_SWSM, swsm);
+}
+
+/**
+ * e1000_get_auto_rd_done_generic - Check for auto read completion
+ * @hw: pointer to the HW structure
+ *
+ * Check EEPROM for Auto Read done bit.
+ **/
+s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw)
+{
+ s32 i = 0;
+
+ DEBUGFUNC("e1000_get_auto_rd_done_generic");
+
+ while (i < AUTO_READ_DONE_TIMEOUT) {
+ if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_AUTO_RD)
+ break;
+ msec_delay(1);
+ i++;
+ }
+
+ if (i == AUTO_READ_DONE_TIMEOUT) {
+ DEBUGOUT("Auto read by HW from NVM has not completed.\n");
+ return -E1000_ERR_RESET;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_valid_led_default_generic - Verify a valid default LED config
+ * @hw: pointer to the HW structure
+ * @data: pointer to the NVM (EEPROM)
+ *
+ * Read the EEPROM for the current default LED configuration. If the
+ * LED configuration is not valid, set to a valid LED configuration.
+ **/
+s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_valid_led_default_generic");
+
+ ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+
+ if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF)
+ *data = ID_LED_DEFAULT;
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_id_led_init_generic -
+ * @hw: pointer to the HW structure
+ *
+ **/
+s32 e1000_id_led_init_generic(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ s32 ret_val;
+ const u32 ledctl_mask = 0x000000FF;
+ const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON;
+ const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
+ u16 data, i, temp;
+ const u16 led_mask = 0x0F;
+
+ DEBUGFUNC("e1000_id_led_init_generic");
+
+ ret_val = hw->nvm.ops.valid_led_default(hw, &data);
+ if (ret_val)
+ return ret_val;
+
+ mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL);
+ mac->ledctl_mode1 = mac->ledctl_default;
+ mac->ledctl_mode2 = mac->ledctl_default;
+
+ for (i = 0; i < 4; i++) {
+ temp = (data >> (i << 2)) & led_mask;
+ switch (temp) {
+ case ID_LED_ON1_DEF2:
+ case ID_LED_ON1_ON2:
+ case ID_LED_ON1_OFF2:
+ mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+ mac->ledctl_mode1 |= ledctl_on << (i << 3);
+ break;
+ case ID_LED_OFF1_DEF2:
+ case ID_LED_OFF1_ON2:
+ case ID_LED_OFF1_OFF2:
+ mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+ mac->ledctl_mode1 |= ledctl_off << (i << 3);
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
+ switch (temp) {
+ case ID_LED_DEF1_ON2:
+ case ID_LED_ON1_ON2:
+ case ID_LED_OFF1_ON2:
+ mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+ mac->ledctl_mode2 |= ledctl_on << (i << 3);
+ break;
+ case ID_LED_DEF1_OFF2:
+ case ID_LED_ON1_OFF2:
+ case ID_LED_OFF1_OFF2:
+ mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+ mac->ledctl_mode2 |= ledctl_off << (i << 3);
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_setup_led_generic - Configures SW controllable LED
+ * @hw: pointer to the HW structure
+ *
+ * This prepares the SW controllable LED for use and saves the current state
+ * of the LED so it can be later restored.
+ **/
+s32 e1000_setup_led_generic(struct e1000_hw *hw)
+{
+ u32 ledctl;
+
+ DEBUGFUNC("e1000_setup_led_generic");
+
+ if (hw->mac.ops.setup_led != e1000_setup_led_generic)
+ return -E1000_ERR_CONFIG;
+
+ if (hw->phy.media_type == e1000_media_type_fiber) {
+ ledctl = E1000_READ_REG(hw, E1000_LEDCTL);
+ hw->mac.ledctl_default = ledctl;
+ /* Turn off LED0 */
+ ledctl &= ~(E1000_LEDCTL_LED0_IVRT | E1000_LEDCTL_LED0_BLINK |
+ E1000_LEDCTL_LED0_MODE_MASK);
+ ledctl |= (E1000_LEDCTL_MODE_LED_OFF <<
+ E1000_LEDCTL_LED0_MODE_SHIFT);
+ E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl);
+ } else if (hw->phy.media_type == e1000_media_type_copper) {
+ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1);
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_cleanup_led_generic - Set LED config to default operation
+ * @hw: pointer to the HW structure
+ *
+ * Remove the current LED configuration and set the LED configuration
+ * to the default value, saved from the EEPROM.
+ **/
+s32 e1000_cleanup_led_generic(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_cleanup_led_generic");
+
+ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default);
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_blink_led_generic - Blink LED
+ * @hw: pointer to the HW structure
+ *
+ * Blink the LEDs which are set to be on.
+ **/
+s32 e1000_blink_led_generic(struct e1000_hw *hw)
+{
+ u32 ledctl_blink = 0;
+ u32 i;
+
+ DEBUGFUNC("e1000_blink_led_generic");
+
+ if (hw->phy.media_type == e1000_media_type_fiber) {
+ /* always blink LED0 for PCI-E fiber */
+ ledctl_blink = E1000_LEDCTL_LED0_BLINK |
+ (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT);
+ } else {
+ /* Set the blink bit for each LED that's "on" (0x0E)
+ * (or "off" if inverted) in ledctl_mode2. The blink
+ * logic in hardware only works when mode is set to "on"
+ * so it must be changed accordingly when the mode is
+ * "off" and inverted.
+ */
+ ledctl_blink = hw->mac.ledctl_mode2;
+ for (i = 0; i < 32; i += 8) {
+ u32 mode = (hw->mac.ledctl_mode2 >> i) &
+ E1000_LEDCTL_LED0_MODE_MASK;
+ u32 led_default = hw->mac.ledctl_default >> i;
+
+ if ((!(led_default & E1000_LEDCTL_LED0_IVRT) &&
+ (mode == E1000_LEDCTL_MODE_LED_ON)) ||
+ ((led_default & E1000_LEDCTL_LED0_IVRT) &&
+ (mode == E1000_LEDCTL_MODE_LED_OFF))) {
+ ledctl_blink &=
+ ~(E1000_LEDCTL_LED0_MODE_MASK << i);
+ ledctl_blink |= (E1000_LEDCTL_LED0_BLINK |
+ E1000_LEDCTL_MODE_LED_ON) << i;
+ }
+ }
+ }
+
+ E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl_blink);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_led_on_generic - Turn LED on
+ * @hw: pointer to the HW structure
+ *
+ * Turn LED on.
+ **/
+s32 e1000_led_on_generic(struct e1000_hw *hw)
+{
+ u32 ctrl;
+
+ DEBUGFUNC("e1000_led_on_generic");
+
+ switch (hw->phy.media_type) {
+ case e1000_media_type_fiber:
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ ctrl &= ~E1000_CTRL_SWDPIN0;
+ ctrl |= E1000_CTRL_SWDPIO0;
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+ break;
+ case e1000_media_type_copper:
+ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2);
+ break;
+ default:
+ break;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_led_off_generic - Turn LED off
+ * @hw: pointer to the HW structure
+ *
+ * Turn LED off.
+ **/
+s32 e1000_led_off_generic(struct e1000_hw *hw)
+{
+ u32 ctrl;
+
+ DEBUGFUNC("e1000_led_off_generic");
+
+ switch (hw->phy.media_type) {
+ case e1000_media_type_fiber:
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ ctrl |= E1000_CTRL_SWDPIN0;
+ ctrl |= E1000_CTRL_SWDPIO0;
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+ break;
+ case e1000_media_type_copper:
+ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1);
+ break;
+ default:
+ break;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_set_pcie_no_snoop_generic - Set PCI-express capabilities
+ * @hw: pointer to the HW structure
+ * @no_snoop: bitmap of snoop events
+ *
+ * Set the PCI-express register to snoop for events enabled in 'no_snoop'.
+ **/
+void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop)
+{
+ u32 gcr;
+
+ DEBUGFUNC("e1000_set_pcie_no_snoop_generic");
+
+ if (no_snoop) {
+ gcr = E1000_READ_REG(hw, E1000_GCR);
+ gcr &= ~(PCIE_NO_SNOOP_ALL);
+ gcr |= no_snoop;
+ E1000_WRITE_REG(hw, E1000_GCR, gcr);
+ }
+}
+
+/**
+ * e1000_disable_pcie_master_generic - Disables PCI-express master access
+ * @hw: pointer to the HW structure
+ *
+ * Returns E1000_SUCCESS if successful, else returns -10
+ * (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused
+ * the master requests to be disabled.
+ *
+ * Disables PCI-Express master access and verifies there are no pending
+ * requests.
+ **/
+s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw)
+{
+ u32 ctrl;
+ s32 timeout = MASTER_DISABLE_TIMEOUT;
+
+ DEBUGFUNC("e1000_disable_pcie_master_generic");
+
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ ctrl |= E1000_CTRL_GIO_MASTER_DISABLE;
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+ while (timeout) {
+ if (!(E1000_READ_REG(hw, E1000_STATUS) &
+ E1000_STATUS_GIO_MASTER_ENABLE))
+ break;
+ usec_delay(100);
+ timeout--;
+ }
+
+ if (!timeout) {
+ DEBUGOUT("Master requests are pending.\n");
+ return -E1000_ERR_MASTER_REQUESTS_PENDING;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_reset_adaptive_generic - Reset Adaptive Interframe Spacing
+ * @hw: pointer to the HW structure
+ *
+ * Reset the Adaptive Interframe Spacing throttle to default values.
+ **/
+void e1000_reset_adaptive_generic(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+
+ DEBUGFUNC("e1000_reset_adaptive_generic");
+
+ if (!mac->adaptive_ifs) {
+ DEBUGOUT("Not in Adaptive IFS mode!\n");
+ return;
+ }
+
+ mac->current_ifs_val = 0;
+ mac->ifs_min_val = IFS_MIN;
+ mac->ifs_max_val = IFS_MAX;
+ mac->ifs_step_size = IFS_STEP;
+ mac->ifs_ratio = IFS_RATIO;
+
+ mac->in_ifs_mode = false;
+ E1000_WRITE_REG(hw, E1000_AIT, 0);
+}
+
+/**
+ * e1000_update_adaptive_generic - Update Adaptive Interframe Spacing
+ * @hw: pointer to the HW structure
+ *
+ * Update the Adaptive Interframe Spacing Throttle value based on the
+ * time between transmitted packets and time between collisions.
+ **/
+void e1000_update_adaptive_generic(struct e1000_hw *hw)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+
+ DEBUGFUNC("e1000_update_adaptive_generic");
+
+ if (!mac->adaptive_ifs) {
+ DEBUGOUT("Not in Adaptive IFS mode!\n");
+ return;
+ }
+
+ if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) {
+ if (mac->tx_packet_delta > MIN_NUM_XMITS) {
+ mac->in_ifs_mode = true;
+ if (mac->current_ifs_val < mac->ifs_max_val) {
+ if (!mac->current_ifs_val)
+ mac->current_ifs_val = mac->ifs_min_val;
+ else
+ mac->current_ifs_val +=
+ mac->ifs_step_size;
+ E1000_WRITE_REG(hw, E1000_AIT,
+ mac->current_ifs_val);
+ }
+ }
+ } else {
+ if (mac->in_ifs_mode &&
+ (mac->tx_packet_delta <= MIN_NUM_XMITS)) {
+ mac->current_ifs_val = 0;
+ mac->in_ifs_mode = false;
+ E1000_WRITE_REG(hw, E1000_AIT, 0);
+ }
+ }
+}
+
+/**
+ * e1000_validate_mdi_setting_generic - Verify MDI/MDIx settings
+ * @hw: pointer to the HW structure
+ *
+ * Verify that when not using auto-negotiation that MDI/MDIx is correctly
+ * set, which is forced to MDI mode only.
+ **/
+static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_validate_mdi_setting_generic");
+
+ if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) {
+ DEBUGOUT("Invalid MDI setting detected\n");
+ hw->phy.mdix = 1;
+ return -E1000_ERR_CONFIG;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_validate_mdi_setting_crossover_generic - Verify MDI/MDIx settings
+ * @hw: pointer to the HW structure
+ *
+ * Validate the MDI/MDIx setting, allowing for auto-crossover during forced
+ * operation.
+ **/
+s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+ DEBUGFUNC("e1000_validate_mdi_setting_crossover_generic");
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_8bit_ctrl_reg_generic - Write a 8bit CTRL register
+ * @hw: pointer to the HW structure
+ * @reg: 32bit register offset such as E1000_SCTL
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Writes an address/data control type register. There are several of these
+ * and they all have the format address << 8 | data and bit 31 is polled for
+ * completion.
+ **/
+s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg,
+ u32 offset, u8 data)
+{
+ u32 i, regvalue = 0;
+
+ DEBUGFUNC("e1000_write_8bit_ctrl_reg_generic");
+
+ /* Set up the address and data */
+ regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT);
+ E1000_WRITE_REG(hw, reg, regvalue);
+
+ /* Poll the ready bit to see if the MDI read completed */
+ for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) {
+ usec_delay(5);
+ regvalue = E1000_READ_REG(hw, reg);
+ if (regvalue & E1000_GEN_CTL_READY)
+ break;
+ }
+ if (!(regvalue & E1000_GEN_CTL_READY)) {
+ DEBUGOUT1("Reg %08x did not indicate ready\n", reg);
+ return -E1000_ERR_PHY;
+ }
+
+ return E1000_SUCCESS;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
new file mode 100644
index 00000000..6a1b0f52
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
@@ -0,0 +1,80 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_MAC_H_
+#define _E1000_MAC_H_
+
+void e1000_init_mac_ops_generic(struct e1000_hw *hw);
+void e1000_null_mac_generic(struct e1000_hw *hw);
+s32 e1000_null_ops_generic(struct e1000_hw *hw);
+s32 e1000_null_link_info(struct e1000_hw *hw, u16 *s, u16 *d);
+bool e1000_null_mng_mode(struct e1000_hw *hw);
+void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a);
+void e1000_null_write_vfta(struct e1000_hw *hw, u32 a, u32 b);
+void e1000_null_rar_set(struct e1000_hw *hw, u8 *h, u32 a);
+s32 e1000_blink_led_generic(struct e1000_hw *hw);
+s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw);
+s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw);
+s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw);
+s32 e1000_cleanup_led_generic(struct e1000_hw *hw);
+s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw);
+s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw);
+s32 e1000_force_mac_fc_generic(struct e1000_hw *hw);
+s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw);
+s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw);
+void e1000_set_lan_id_single_port(struct e1000_hw *hw);
+s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw);
+s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed,
+ u16 *duplex);
+s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw,
+ u16 *speed, u16 *duplex);
+s32 e1000_id_led_init_generic(struct e1000_hw *hw);
+s32 e1000_led_on_generic(struct e1000_hw *hw);
+s32 e1000_led_off_generic(struct e1000_hw *hw);
+void e1000_update_mc_addr_list_generic(struct e1000_hw *hw,
+ u8 *mc_addr_list, u32 mc_addr_count);
+s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw);
+s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw);
+s32 e1000_setup_led_generic(struct e1000_hw *hw);
+s32 e1000_setup_link_generic(struct e1000_hw *hw);
+s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw *hw);
+s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg,
+ u32 offset, u8 data);
+
+u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr);
+
+void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw);
+void e1000_clear_vfta_generic(struct e1000_hw *hw);
+void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count);
+void e1000_put_hw_semaphore_generic(struct e1000_hw *hw);
+s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw);
+void e1000_reset_adaptive_generic(struct e1000_hw *hw);
+void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop);
+void e1000_update_adaptive_generic(struct e1000_hw *hw);
+void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value);
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
new file mode 100644
index 00000000..a1700398
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
@@ -0,0 +1,554 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+/**
+ * e1000_calculate_checksum - Calculate checksum for buffer
+ * @buffer: pointer to EEPROM
+ * @length: size of EEPROM to calculate a checksum for
+ *
+ * Calculates the checksum for some buffer on a specified length. The
+ * checksum calculated is returned.
+ **/
+u8 e1000_calculate_checksum(u8 *buffer, u32 length)
+{
+ u32 i;
+ u8 sum = 0;
+
+ DEBUGFUNC("e1000_calculate_checksum");
+
+ if (!buffer)
+ return 0;
+
+ for (i = 0; i < length; i++)
+ sum += buffer[i];
+
+ return (u8) (0 - sum);
+}
+
+/**
+ * e1000_mng_enable_host_if_generic - Checks host interface is enabled
+ * @hw: pointer to the HW structure
+ *
+ * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
+ *
+ * This function checks whether the HOST IF is enabled for command operation
+ * and also checks whether the previous command is completed. It busy waits
+ * in case of previous command is not completed.
+ **/
+s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw)
+{
+ u32 hicr;
+ u8 i;
+
+ DEBUGFUNC("e1000_mng_enable_host_if_generic");
+
+ if (!hw->mac.arc_subsystem_valid) {
+ DEBUGOUT("ARC subsystem not valid.\n");
+ return -E1000_ERR_HOST_INTERFACE_COMMAND;
+ }
+
+ /* Check that the host interface is enabled. */
+ hicr = E1000_READ_REG(hw, E1000_HICR);
+ if (!(hicr & E1000_HICR_EN)) {
+ DEBUGOUT("E1000_HOST_EN bit disabled.\n");
+ return -E1000_ERR_HOST_INTERFACE_COMMAND;
+ }
+ /* check the previous command is completed */
+ for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) {
+ hicr = E1000_READ_REG(hw, E1000_HICR);
+ if (!(hicr & E1000_HICR_C))
+ break;
+ msec_delay_irq(1);
+ }
+
+ if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
+ DEBUGOUT("Previous command timeout failed .\n");
+ return -E1000_ERR_HOST_INTERFACE_COMMAND;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_check_mng_mode_generic - Generic check management mode
+ * @hw: pointer to the HW structure
+ *
+ * Reads the firmware semaphore register and returns true (>0) if
+ * manageability is enabled, else false (0).
+ **/
+bool e1000_check_mng_mode_generic(struct e1000_hw *hw)
+{
+ u32 fwsm = E1000_READ_REG(hw, E1000_FWSM);
+
+ DEBUGFUNC("e1000_check_mng_mode_generic");
+
+
+ return (fwsm & E1000_FWSM_MODE_MASK) ==
+ (E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT);
+}
+
+/**
+ * e1000_enable_tx_pkt_filtering_generic - Enable packet filtering on Tx
+ * @hw: pointer to the HW structure
+ *
+ * Enables packet filtering on transmit packets if manageability is enabled
+ * and host interface is enabled.
+ **/
+bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw)
+{
+ struct e1000_host_mng_dhcp_cookie *hdr = &hw->mng_cookie;
+ u32 *buffer = (u32 *)&hw->mng_cookie;
+ u32 offset;
+ s32 ret_val, hdr_csum, csum;
+ u8 i, len;
+
+ DEBUGFUNC("e1000_enable_tx_pkt_filtering_generic");
+
+ hw->mac.tx_pkt_filtering = true;
+
+ /* No manageability, no filtering */
+ if (!hw->mac.ops.check_mng_mode(hw)) {
+ hw->mac.tx_pkt_filtering = false;
+ return hw->mac.tx_pkt_filtering;
+ }
+
+ /* If we can't read from the host interface for whatever
+ * reason, disable filtering.
+ */
+ ret_val = e1000_mng_enable_host_if_generic(hw);
+ if (ret_val != E1000_SUCCESS) {
+ hw->mac.tx_pkt_filtering = false;
+ return hw->mac.tx_pkt_filtering;
+ }
+
+ /* Read in the header. Length and offset are in dwords. */
+ len = E1000_MNG_DHCP_COOKIE_LENGTH >> 2;
+ offset = E1000_MNG_DHCP_COOKIE_OFFSET >> 2;
+ for (i = 0; i < len; i++)
+ *(buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF,
+ offset + i);
+ hdr_csum = hdr->checksum;
+ hdr->checksum = 0;
+ csum = e1000_calculate_checksum((u8 *)hdr,
+ E1000_MNG_DHCP_COOKIE_LENGTH);
+ /* If either the checksums or signature don't match, then
+ * the cookie area isn't considered valid, in which case we
+ * take the safe route of assuming Tx filtering is enabled.
+ */
+ if ((hdr_csum != csum) || (hdr->signature != E1000_IAMT_SIGNATURE)) {
+ hw->mac.tx_pkt_filtering = true;
+ return hw->mac.tx_pkt_filtering;
+ }
+
+ /* Cookie area is valid, make the final check for filtering. */
+ if (!(hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING))
+ hw->mac.tx_pkt_filtering = false;
+
+ return hw->mac.tx_pkt_filtering;
+}
+
+/**
+ * e1000_mng_write_cmd_header_generic - Writes manageability command header
+ * @hw: pointer to the HW structure
+ * @hdr: pointer to the host interface command header
+ *
+ * Writes the command header after does the checksum calculation.
+ **/
+s32 e1000_mng_write_cmd_header_generic(struct e1000_hw *hw,
+ struct e1000_host_mng_command_header *hdr)
+{
+ u16 i, length = sizeof(struct e1000_host_mng_command_header);
+
+ DEBUGFUNC("e1000_mng_write_cmd_header_generic");
+
+ /* Write the whole command header structure with new checksum. */
+
+ hdr->checksum = e1000_calculate_checksum((u8 *)hdr, length);
+
+ length >>= 2;
+ /* Write the relevant command block into the ram area. */
+ for (i = 0; i < length; i++) {
+ E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i,
+ *((u32 *) hdr + i));
+ E1000_WRITE_FLUSH(hw);
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_mng_host_if_write_generic - Write to the manageability host interface
+ * @hw: pointer to the HW structure
+ * @buffer: pointer to the host interface buffer
+ * @length: size of the buffer
+ * @offset: location in the buffer to write to
+ * @sum: sum of the data (not checksum)
+ *
+ * This function writes the buffer content at the offset given on the host if.
+ * It also does alignment considerations to do the writes in most efficient
+ * way. Also fills up the sum of the buffer in *buffer parameter.
+ **/
+s32 e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer,
+ u16 length, u16 offset, u8 *sum)
+{
+ u8 *tmp;
+ u8 *bufptr = buffer;
+ u32 data = 0;
+ u16 remaining, i, j, prev_bytes;
+
+ DEBUGFUNC("e1000_mng_host_if_write_generic");
+
+ /* sum = only sum of the data and it is not checksum */
+
+ if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH)
+ return -E1000_ERR_PARAM;
+
+ tmp = (u8 *)&data;
+ prev_bytes = offset & 0x3;
+ offset >>= 2;
+
+ if (prev_bytes) {
+ data = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset);
+ for (j = prev_bytes; j < sizeof(u32); j++) {
+ *(tmp + j) = *bufptr++;
+ *sum += *(tmp + j);
+ }
+ E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset, data);
+ length -= j - prev_bytes;
+ offset++;
+ }
+
+ remaining = length & 0x3;
+ length -= remaining;
+
+ /* Calculate length in DWORDs */
+ length >>= 2;
+
+ /* The device driver writes the relevant command block into the
+ * ram area.
+ */
+ for (i = 0; i < length; i++) {
+ for (j = 0; j < sizeof(u32); j++) {
+ *(tmp + j) = *bufptr++;
+ *sum += *(tmp + j);
+ }
+
+ E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i,
+ data);
+ }
+ if (remaining) {
+ for (j = 0; j < sizeof(u32); j++) {
+ if (j < remaining)
+ *(tmp + j) = *bufptr++;
+ else
+ *(tmp + j) = 0;
+
+ *sum += *(tmp + j);
+ }
+ E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i,
+ data);
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_mng_write_dhcp_info_generic - Writes DHCP info to host interface
+ * @hw: pointer to the HW structure
+ * @buffer: pointer to the host interface
+ * @length: size of the buffer
+ *
+ * Writes the DHCP information to the host interface.
+ **/
+s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw, u8 *buffer,
+ u16 length)
+{
+ struct e1000_host_mng_command_header hdr;
+ s32 ret_val;
+ u32 hicr;
+
+ DEBUGFUNC("e1000_mng_write_dhcp_info_generic");
+
+ hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD;
+ hdr.command_length = length;
+ hdr.reserved1 = 0;
+ hdr.reserved2 = 0;
+ hdr.checksum = 0;
+
+ /* Enable the host interface */
+ ret_val = e1000_mng_enable_host_if_generic(hw);
+ if (ret_val)
+ return ret_val;
+
+ /* Populate the host interface with the contents of "buffer". */
+ ret_val = e1000_mng_host_if_write_generic(hw, buffer, length,
+ sizeof(hdr), &(hdr.checksum));
+ if (ret_val)
+ return ret_val;
+
+ /* Write the manageability command header */
+ ret_val = e1000_mng_write_cmd_header_generic(hw, &hdr);
+ if (ret_val)
+ return ret_val;
+
+ /* Tell the ARC a new command is pending. */
+ hicr = E1000_READ_REG(hw, E1000_HICR);
+ E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_enable_mng_pass_thru - Check if management passthrough is needed
+ * @hw: pointer to the HW structure
+ *
+ * Verifies the hardware needs to leave interface enabled so that frames can
+ * be directed to and from the management interface.
+ **/
+bool e1000_enable_mng_pass_thru(struct e1000_hw *hw)
+{
+ u32 manc;
+ u32 fwsm, factps;
+
+ DEBUGFUNC("e1000_enable_mng_pass_thru");
+
+ if (!hw->mac.asf_firmware_present)
+ return false;
+
+ manc = E1000_READ_REG(hw, E1000_MANC);
+
+ if (!(manc & E1000_MANC_RCV_TCO_EN))
+ return false;
+
+ if (hw->mac.has_fwsm) {
+ fwsm = E1000_READ_REG(hw, E1000_FWSM);
+ factps = E1000_READ_REG(hw, E1000_FACTPS);
+
+ if (!(factps & E1000_FACTPS_MNGCG) &&
+ ((fwsm & E1000_FWSM_MODE_MASK) ==
+ (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT)))
+ return true;
+ } else if ((manc & E1000_MANC_SMBUS_EN) &&
+ !(manc & E1000_MANC_ASF_EN)) {
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * e1000_host_interface_command - Writes buffer to host interface
+ * @hw: pointer to the HW structure
+ * @buffer: contains a command to write
+ * @length: the byte length of the buffer, must be multiple of 4 bytes
+ *
+ * Writes a buffer to the Host Interface. Upon success, returns E1000_SUCCESS
+ * else returns E1000_ERR_HOST_INTERFACE_COMMAND.
+ **/
+s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length)
+{
+ u32 hicr, i;
+
+ DEBUGFUNC("e1000_host_interface_command");
+
+ if (!(hw->mac.arc_subsystem_valid)) {
+ DEBUGOUT("Hardware doesn't support host interface command.\n");
+ return E1000_SUCCESS;
+ }
+
+ if (!hw->mac.asf_firmware_present) {
+ DEBUGOUT("Firmware is not present.\n");
+ return E1000_SUCCESS;
+ }
+
+ if (length == 0 || length & 0x3 ||
+ length > E1000_HI_MAX_BLOCK_BYTE_LENGTH) {
+ DEBUGOUT("Buffer length failure.\n");
+ return -E1000_ERR_HOST_INTERFACE_COMMAND;
+ }
+
+ /* Check that the host interface is enabled. */
+ hicr = E1000_READ_REG(hw, E1000_HICR);
+ if (!(hicr & E1000_HICR_EN)) {
+ DEBUGOUT("E1000_HOST_EN bit disabled.\n");
+ return -E1000_ERR_HOST_INTERFACE_COMMAND;
+ }
+
+ /* Calculate length in DWORDs */
+ length >>= 2;
+
+ /* The device driver writes the relevant command block
+ * into the ram area.
+ */
+ for (i = 0; i < length; i++)
+ E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i,
+ *((u32 *)buffer + i));
+
+ /* Setting this bit tells the ARC that a new command is pending. */
+ E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C);
+
+ for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) {
+ hicr = E1000_READ_REG(hw, E1000_HICR);
+ if (!(hicr & E1000_HICR_C))
+ break;
+ msec_delay(1);
+ }
+
+ /* Check command successful completion. */
+ if (i == E1000_HI_COMMAND_TIMEOUT ||
+ (!(E1000_READ_REG(hw, E1000_HICR) & E1000_HICR_SV))) {
+ DEBUGOUT("Command has failed with no status valid.\n");
+ return -E1000_ERR_HOST_INTERFACE_COMMAND;
+ }
+
+ for (i = 0; i < length; i++)
+ *((u32 *)buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw,
+ E1000_HOST_IF,
+ i);
+
+ return E1000_SUCCESS;
+}
+/**
+ * e1000_load_firmware - Writes proxy FW code buffer to host interface
+ * and execute.
+ * @hw: pointer to the HW structure
+ * @buffer: contains a firmware to write
+ * @length: the byte length of the buffer, must be multiple of 4 bytes
+ *
+ * Upon success returns E1000_SUCCESS, returns E1000_ERR_CONFIG if not enabled
+ * in HW else returns E1000_ERR_HOST_INTERFACE_COMMAND.
+ **/
+s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length)
+{
+ u32 hicr, hibba, fwsm, icr, i;
+
+ DEBUGFUNC("e1000_load_firmware");
+
+ if (hw->mac.type < e1000_i210) {
+ DEBUGOUT("Hardware doesn't support loading FW by the driver\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ /* Check that the host interface is enabled. */
+ hicr = E1000_READ_REG(hw, E1000_HICR);
+ if (!(hicr & E1000_HICR_EN)) {
+ DEBUGOUT("E1000_HOST_EN bit disabled.\n");
+ return -E1000_ERR_CONFIG;
+ }
+ if (!(hicr & E1000_HICR_MEMORY_BASE_EN)) {
+ DEBUGOUT("E1000_HICR_MEMORY_BASE_EN bit disabled.\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ if (length == 0 || length & 0x3 || length > E1000_HI_FW_MAX_LENGTH) {
+ DEBUGOUT("Buffer length failure.\n");
+ return -E1000_ERR_INVALID_ARGUMENT;
+ }
+
+ /* Clear notification from ROM-FW by reading ICR register */
+ icr = E1000_READ_REG(hw, E1000_ICR_V2);
+
+ /* Reset ROM-FW */
+ hicr = E1000_READ_REG(hw, E1000_HICR);
+ hicr |= E1000_HICR_FW_RESET_ENABLE;
+ E1000_WRITE_REG(hw, E1000_HICR, hicr);
+ hicr |= E1000_HICR_FW_RESET;
+ E1000_WRITE_REG(hw, E1000_HICR, hicr);
+ E1000_WRITE_FLUSH(hw);
+
+ /* Wait till MAC notifies about its readiness after ROM-FW reset */
+ for (i = 0; i < (E1000_HI_COMMAND_TIMEOUT * 2); i++) {
+ icr = E1000_READ_REG(hw, E1000_ICR_V2);
+ if (icr & E1000_ICR_MNG)
+ break;
+ msec_delay(1);
+ }
+
+ /* Check for timeout */
+ if (i == E1000_HI_COMMAND_TIMEOUT) {
+ DEBUGOUT("FW reset failed.\n");
+ return -E1000_ERR_HOST_INTERFACE_COMMAND;
+ }
+
+ /* Wait till MAC is ready to accept new FW code */
+ for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) {
+ fwsm = E1000_READ_REG(hw, E1000_FWSM);
+ if ((fwsm & E1000_FWSM_FW_VALID) &&
+ ((fwsm & E1000_FWSM_MODE_MASK) >> E1000_FWSM_MODE_SHIFT ==
+ E1000_FWSM_HI_EN_ONLY_MODE))
+ break;
+ msec_delay(1);
+ }
+
+ /* Check for timeout */
+ if (i == E1000_HI_COMMAND_TIMEOUT) {
+ DEBUGOUT("FW reset failed.\n");
+ return -E1000_ERR_HOST_INTERFACE_COMMAND;
+ }
+
+ /* Calculate length in DWORDs */
+ length >>= 2;
+
+ /* The device driver writes the relevant FW code block
+ * into the ram area in DWORDs via 1kB ram addressing window.
+ */
+ for (i = 0; i < length; i++) {
+ if (!(i % E1000_HI_FW_BLOCK_DWORD_LENGTH)) {
+ /* Point to correct 1kB ram window */
+ hibba = E1000_HI_FW_BASE_ADDRESS +
+ ((E1000_HI_FW_BLOCK_DWORD_LENGTH << 2) *
+ (i / E1000_HI_FW_BLOCK_DWORD_LENGTH));
+
+ E1000_WRITE_REG(hw, E1000_HIBBA, hibba);
+ }
+
+ E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF,
+ i % E1000_HI_FW_BLOCK_DWORD_LENGTH,
+ *((u32 *)buffer + i));
+ }
+
+ /* Setting this bit tells the ARC that a new FW is ready to execute. */
+ hicr = E1000_READ_REG(hw, E1000_HICR);
+ E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C);
+
+ for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) {
+ hicr = E1000_READ_REG(hw, E1000_HICR);
+ if (!(hicr & E1000_HICR_C))
+ break;
+ msec_delay(1);
+ }
+
+ /* Check for successful FW start. */
+ if (i == E1000_HI_COMMAND_TIMEOUT) {
+ DEBUGOUT("New FW did not start within timeout period.\n");
+ return -E1000_ERR_HOST_INTERFACE_COMMAND;
+ }
+
+ return E1000_SUCCESS;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
new file mode 100644
index 00000000..c94b2185
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
@@ -0,0 +1,89 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_MANAGE_H_
+#define _E1000_MANAGE_H_
+
+bool e1000_check_mng_mode_generic(struct e1000_hw *hw);
+bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw);
+s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw);
+s32 e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer,
+ u16 length, u16 offset, u8 *sum);
+s32 e1000_mng_write_cmd_header_generic(struct e1000_hw *hw,
+ struct e1000_host_mng_command_header *hdr);
+s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw,
+ u8 *buffer, u16 length);
+bool e1000_enable_mng_pass_thru(struct e1000_hw *hw);
+u8 e1000_calculate_checksum(u8 *buffer, u32 length);
+s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length);
+s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length);
+
+enum e1000_mng_mode {
+ e1000_mng_mode_none = 0,
+ e1000_mng_mode_asf,
+ e1000_mng_mode_pt,
+ e1000_mng_mode_ipmi,
+ e1000_mng_mode_host_if_only
+};
+
+#define E1000_FACTPS_MNGCG 0x20000000
+
+#define E1000_FWSM_MODE_MASK 0xE
+#define E1000_FWSM_MODE_SHIFT 1
+#define E1000_FWSM_FW_VALID 0x00008000
+#define E1000_FWSM_HI_EN_ONLY_MODE 0x4
+
+#define E1000_MNG_IAMT_MODE 0x3
+#define E1000_MNG_DHCP_COOKIE_LENGTH 0x10
+#define E1000_MNG_DHCP_COOKIE_OFFSET 0x6F0
+#define E1000_MNG_DHCP_COMMAND_TIMEOUT 10
+#define E1000_MNG_DHCP_TX_PAYLOAD_CMD 64
+#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING 0x1
+#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2
+
+#define E1000_VFTA_ENTRY_SHIFT 5
+#define E1000_VFTA_ENTRY_MASK 0x7F
+#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F
+
+#define E1000_HI_MAX_BLOCK_BYTE_LENGTH 1792 /* Num of bytes in range */
+#define E1000_HI_MAX_BLOCK_DWORD_LENGTH 448 /* Num of dwords in range */
+#define E1000_HI_COMMAND_TIMEOUT 500 /* Process HI cmd limit */
+#define E1000_HI_FW_BASE_ADDRESS 0x10000
+#define E1000_HI_FW_MAX_LENGTH (64 * 1024) /* Num of bytes */
+#define E1000_HI_FW_BLOCK_DWORD_LENGTH 256 /* Num of DWORDs per page */
+#define E1000_HICR_MEMORY_BASE_EN 0x200 /* MB Enable bit - RO */
+#define E1000_HICR_EN 0x01 /* Enable bit - RO */
+/* Driver sets this bit when done to put command in RAM */
+#define E1000_HICR_C 0x02
+#define E1000_HICR_SV 0x04 /* Status Validity */
+#define E1000_HICR_FW_RESET_ENABLE 0x40
+#define E1000_HICR_FW_RESET 0x80
+
+/* Intel(R) Active Management Technology signature */
+#define E1000_IAMT_SIGNATURE 0x544D4149
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
new file mode 100644
index 00000000..3ef0d98b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
@@ -0,0 +1,525 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_mbx.h"
+
+/**
+ * e1000_null_mbx_check_for_flag - No-op function, return 0
+ * @hw: pointer to the HW structure
+ **/
+static s32 e1000_null_mbx_check_for_flag(struct e1000_hw E1000_UNUSEDARG *hw,
+ u16 E1000_UNUSEDARG mbx_id)
+{
+ DEBUGFUNC("e1000_null_mbx_check_flag");
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_null_mbx_transact - No-op function, return 0
+ * @hw: pointer to the HW structure
+ **/
+static s32 e1000_null_mbx_transact(struct e1000_hw E1000_UNUSEDARG *hw,
+ u32 E1000_UNUSEDARG *msg,
+ u16 E1000_UNUSEDARG size,
+ u16 E1000_UNUSEDARG mbx_id)
+{
+ DEBUGFUNC("e1000_null_mbx_rw_msg");
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_mbx - Reads a message from the mailbox
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @mbx_id: id of mailbox to read
+ *
+ * returns SUCCESS if it successfully read message from buffer
+ **/
+s32 e1000_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+ struct e1000_mbx_info *mbx = &hw->mbx;
+ s32 ret_val = -E1000_ERR_MBX;
+
+ DEBUGFUNC("e1000_read_mbx");
+
+ /* limit read to size of mailbox */
+ if (size > mbx->size)
+ size = mbx->size;
+
+ if (mbx->ops.read)
+ ret_val = mbx->ops.read(hw, msg, size, mbx_id);
+
+ return ret_val;
+}
+
+/**
+ * e1000_write_mbx - Write a message to the mailbox
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @mbx_id: id of mailbox to write
+ *
+ * returns SUCCESS if it successfully copied message into the buffer
+ **/
+s32 e1000_write_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+ struct e1000_mbx_info *mbx = &hw->mbx;
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("e1000_write_mbx");
+
+ if (size > mbx->size)
+ ret_val = -E1000_ERR_MBX;
+
+ else if (mbx->ops.write)
+ ret_val = mbx->ops.write(hw, msg, size, mbx_id);
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_for_msg - checks to see if someone sent us mail
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to check
+ *
+ * returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 e1000_check_for_msg(struct e1000_hw *hw, u16 mbx_id)
+{
+ struct e1000_mbx_info *mbx = &hw->mbx;
+ s32 ret_val = -E1000_ERR_MBX;
+
+ DEBUGFUNC("e1000_check_for_msg");
+
+ if (mbx->ops.check_for_msg)
+ ret_val = mbx->ops.check_for_msg(hw, mbx_id);
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_for_ack - checks to see if someone sent us ACK
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to check
+ *
+ * returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 e1000_check_for_ack(struct e1000_hw *hw, u16 mbx_id)
+{
+ struct e1000_mbx_info *mbx = &hw->mbx;
+ s32 ret_val = -E1000_ERR_MBX;
+
+ DEBUGFUNC("e1000_check_for_ack");
+
+ if (mbx->ops.check_for_ack)
+ ret_val = mbx->ops.check_for_ack(hw, mbx_id);
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_for_rst - checks to see if other side has reset
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to check
+ *
+ * returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 e1000_check_for_rst(struct e1000_hw *hw, u16 mbx_id)
+{
+ struct e1000_mbx_info *mbx = &hw->mbx;
+ s32 ret_val = -E1000_ERR_MBX;
+
+ DEBUGFUNC("e1000_check_for_rst");
+
+ if (mbx->ops.check_for_rst)
+ ret_val = mbx->ops.check_for_rst(hw, mbx_id);
+
+ return ret_val;
+}
+
+/**
+ * e1000_poll_for_msg - Wait for message notification
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to write
+ *
+ * returns SUCCESS if it successfully received a message notification
+ **/
+static s32 e1000_poll_for_msg(struct e1000_hw *hw, u16 mbx_id)
+{
+ struct e1000_mbx_info *mbx = &hw->mbx;
+ int countdown = mbx->timeout;
+
+ DEBUGFUNC("e1000_poll_for_msg");
+
+ if (!countdown || !mbx->ops.check_for_msg)
+ goto out;
+
+ while (countdown && mbx->ops.check_for_msg(hw, mbx_id)) {
+ countdown--;
+ if (!countdown)
+ break;
+ usec_delay(mbx->usec_delay);
+ }
+
+ /* if we failed, all future posted messages fail until reset */
+ if (!countdown)
+ mbx->timeout = 0;
+out:
+ return countdown ? E1000_SUCCESS : -E1000_ERR_MBX;
+}
+
+/**
+ * e1000_poll_for_ack - Wait for message acknowledgement
+ * @hw: pointer to the HW structure
+ * @mbx_id: id of mailbox to write
+ *
+ * returns SUCCESS if it successfully received a message acknowledgement
+ **/
+static s32 e1000_poll_for_ack(struct e1000_hw *hw, u16 mbx_id)
+{
+ struct e1000_mbx_info *mbx = &hw->mbx;
+ int countdown = mbx->timeout;
+
+ DEBUGFUNC("e1000_poll_for_ack");
+
+ if (!countdown || !mbx->ops.check_for_ack)
+ goto out;
+
+ while (countdown && mbx->ops.check_for_ack(hw, mbx_id)) {
+ countdown--;
+ if (!countdown)
+ break;
+ usec_delay(mbx->usec_delay);
+ }
+
+ /* if we failed, all future posted messages fail until reset */
+ if (!countdown)
+ mbx->timeout = 0;
+out:
+ return countdown ? E1000_SUCCESS : -E1000_ERR_MBX;
+}
+
+/**
+ * e1000_read_posted_mbx - Wait for message notification and receive message
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @mbx_id: id of mailbox to write
+ *
+ * returns SUCCESS if it successfully received a message notification and
+ * copied it into the receive buffer.
+ **/
+s32 e1000_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+ struct e1000_mbx_info *mbx = &hw->mbx;
+ s32 ret_val = -E1000_ERR_MBX;
+
+ DEBUGFUNC("e1000_read_posted_mbx");
+
+ if (!mbx->ops.read)
+ goto out;
+
+ ret_val = e1000_poll_for_msg(hw, mbx_id);
+
+ /* if ack received read message, otherwise we timed out */
+ if (!ret_val)
+ ret_val = mbx->ops.read(hw, msg, size, mbx_id);
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_write_posted_mbx - Write a message to the mailbox, wait for ack
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @mbx_id: id of mailbox to write
+ *
+ * returns SUCCESS if it successfully copied message into the buffer and
+ * received an ack to that message within delay * timeout period
+ **/
+s32 e1000_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+ struct e1000_mbx_info *mbx = &hw->mbx;
+ s32 ret_val = -E1000_ERR_MBX;
+
+ DEBUGFUNC("e1000_write_posted_mbx");
+
+ /* exit if either we can't write or there isn't a defined timeout */
+ if (!mbx->ops.write || !mbx->timeout)
+ goto out;
+
+ /* send msg */
+ ret_val = mbx->ops.write(hw, msg, size, mbx_id);
+
+ /* if msg sent wait until we receive an ack */
+ if (!ret_val)
+ ret_val = e1000_poll_for_ack(hw, mbx_id);
+out:
+ return ret_val;
+}
+
+/**
+ * e1000_init_mbx_ops_generic - Initialize mbx function pointers
+ * @hw: pointer to the HW structure
+ *
+ * Sets the function pointers to no-op functions
+ **/
+void e1000_init_mbx_ops_generic(struct e1000_hw *hw)
+{
+ struct e1000_mbx_info *mbx = &hw->mbx;
+ mbx->ops.init_params = e1000_null_ops_generic;
+ mbx->ops.read = e1000_null_mbx_transact;
+ mbx->ops.write = e1000_null_mbx_transact;
+ mbx->ops.check_for_msg = e1000_null_mbx_check_for_flag;
+ mbx->ops.check_for_ack = e1000_null_mbx_check_for_flag;
+ mbx->ops.check_for_rst = e1000_null_mbx_check_for_flag;
+ mbx->ops.read_posted = e1000_read_posted_mbx;
+ mbx->ops.write_posted = e1000_write_posted_mbx;
+}
+
+static s32 e1000_check_for_bit_pf(struct e1000_hw *hw, u32 mask)
+{
+ u32 mbvficr = E1000_READ_REG(hw, E1000_MBVFICR);
+ s32 ret_val = -E1000_ERR_MBX;
+
+ if (mbvficr & mask) {
+ ret_val = E1000_SUCCESS;
+ E1000_WRITE_REG(hw, E1000_MBVFICR, mask);
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_for_msg_pf - checks to see if the VF has sent mail
+ * @hw: pointer to the HW structure
+ * @vf_number: the VF index
+ *
+ * returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 e1000_check_for_msg_pf(struct e1000_hw *hw, u16 vf_number)
+{
+ s32 ret_val = -E1000_ERR_MBX;
+
+ DEBUGFUNC("e1000_check_for_msg_pf");
+
+ if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFREQ_VF1 << vf_number)) {
+ ret_val = E1000_SUCCESS;
+ hw->mbx.stats.reqs++;
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_for_ack_pf - checks to see if the VF has ACKed
+ * @hw: pointer to the HW structure
+ * @vf_number: the VF index
+ *
+ * returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 e1000_check_for_ack_pf(struct e1000_hw *hw, u16 vf_number)
+{
+ s32 ret_val = -E1000_ERR_MBX;
+
+ DEBUGFUNC("e1000_check_for_ack_pf");
+
+ if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFACK_VF1 << vf_number)) {
+ ret_val = E1000_SUCCESS;
+ hw->mbx.stats.acks++;
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_for_rst_pf - checks to see if the VF has reset
+ * @hw: pointer to the HW structure
+ * @vf_number: the VF index
+ *
+ * returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 e1000_check_for_rst_pf(struct e1000_hw *hw, u16 vf_number)
+{
+ u32 vflre = E1000_READ_REG(hw, E1000_VFLRE);
+ s32 ret_val = -E1000_ERR_MBX;
+
+ DEBUGFUNC("e1000_check_for_rst_pf");
+
+ if (vflre & (1 << vf_number)) {
+ ret_val = E1000_SUCCESS;
+ E1000_WRITE_REG(hw, E1000_VFLRE, (1 << vf_number));
+ hw->mbx.stats.rsts++;
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_obtain_mbx_lock_pf - obtain mailbox lock
+ * @hw: pointer to the HW structure
+ * @vf_number: the VF index
+ *
+ * return SUCCESS if we obtained the mailbox lock
+ **/
+static s32 e1000_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number)
+{
+ s32 ret_val = -E1000_ERR_MBX;
+ u32 p2v_mailbox;
+
+ DEBUGFUNC("e1000_obtain_mbx_lock_pf");
+
+ /* Take ownership of the buffer */
+ E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU);
+
+ /* reserve mailbox for vf use */
+ p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number));
+ if (p2v_mailbox & E1000_P2VMAILBOX_PFU)
+ ret_val = E1000_SUCCESS;
+
+ return ret_val;
+}
+
+/**
+ * e1000_write_mbx_pf - Places a message in the mailbox
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @vf_number: the VF index
+ *
+ * returns SUCCESS if it successfully copied message into the buffer
+ **/
+static s32 e1000_write_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
+ u16 vf_number)
+{
+ s32 ret_val;
+ u16 i;
+
+ DEBUGFUNC("e1000_write_mbx_pf");
+
+ /* lock the mailbox to prevent pf/vf race condition */
+ ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number);
+ if (ret_val)
+ goto out_no_write;
+
+ /* flush msg and acks as we are overwriting the message buffer */
+ e1000_check_for_msg_pf(hw, vf_number);
+ e1000_check_for_ack_pf(hw, vf_number);
+
+ /* copy the caller specified message to the mailbox memory buffer */
+ for (i = 0; i < size; i++)
+ E1000_WRITE_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i, msg[i]);
+
+ /* Interrupt VF to tell it a message has been sent and release buffer*/
+ E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_STS);
+
+ /* update stats */
+ hw->mbx.stats.msgs_tx++;
+
+out_no_write:
+ return ret_val;
+
+}
+
+/**
+ * e1000_read_mbx_pf - Read a message from the mailbox
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @vf_number: the VF index
+ *
+ * This function copies a message from the mailbox buffer to the caller's
+ * memory buffer. The presumption is that the caller knows that there was
+ * a message due to a VF request so no polling for message is needed.
+ **/
+static s32 e1000_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
+ u16 vf_number)
+{
+ s32 ret_val;
+ u16 i;
+
+ DEBUGFUNC("e1000_read_mbx_pf");
+
+ /* lock the mailbox to prevent pf/vf race condition */
+ ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number);
+ if (ret_val)
+ goto out_no_read;
+
+ /* copy the message to the mailbox memory buffer */
+ for (i = 0; i < size; i++)
+ msg[i] = E1000_READ_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i);
+
+ /* Acknowledge the message and release buffer */
+ E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_ACK);
+
+ /* update stats */
+ hw->mbx.stats.msgs_rx++;
+
+out_no_read:
+ return ret_val;
+}
+
+/**
+ * e1000_init_mbx_params_pf - set initial values for pf mailbox
+ * @hw: pointer to the HW structure
+ *
+ * Initializes the hw->mbx struct to correct values for pf mailbox
+ */
+s32 e1000_init_mbx_params_pf(struct e1000_hw *hw)
+{
+ struct e1000_mbx_info *mbx = &hw->mbx;
+
+ switch (hw->mac.type) {
+ case e1000_82576:
+ case e1000_i350:
+ case e1000_i354:
+ mbx->timeout = 0;
+ mbx->usec_delay = 0;
+
+ mbx->size = E1000_VFMAILBOX_SIZE;
+
+ mbx->ops.read = e1000_read_mbx_pf;
+ mbx->ops.write = e1000_write_mbx_pf;
+ mbx->ops.read_posted = e1000_read_posted_mbx;
+ mbx->ops.write_posted = e1000_write_posted_mbx;
+ mbx->ops.check_for_msg = e1000_check_for_msg_pf;
+ mbx->ops.check_for_ack = e1000_check_for_ack_pf;
+ mbx->ops.check_for_rst = e1000_check_for_rst_pf;
+
+ mbx->stats.msgs_tx = 0;
+ mbx->stats.msgs_rx = 0;
+ mbx->stats.reqs = 0;
+ mbx->stats.acks = 0;
+ mbx->stats.rsts = 0;
+ default:
+ return E1000_SUCCESS;
+ }
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
new file mode 100644
index 00000000..bbf838c8
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
@@ -0,0 +1,87 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_MBX_H_
+#define _E1000_MBX_H_
+
+#include "e1000_api.h"
+
+#define E1000_P2VMAILBOX_STS 0x00000001 /* Initiate message send to VF */
+#define E1000_P2VMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */
+#define E1000_P2VMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */
+#define E1000_P2VMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */
+#define E1000_P2VMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */
+
+#define E1000_MBVFICR_VFREQ_MASK 0x000000FF /* bits for VF messages */
+#define E1000_MBVFICR_VFREQ_VF1 0x00000001 /* bit for VF 1 message */
+#define E1000_MBVFICR_VFACK_MASK 0x00FF0000 /* bits for VF acks */
+#define E1000_MBVFICR_VFACK_VF1 0x00010000 /* bit for VF 1 ack */
+
+#define E1000_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */
+
+/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the
+ * PF. The reverse is true if it is E1000_PF_*.
+ * Message ACK's are the value or'd with 0xF0000000
+ */
+/* Msgs below or'd with this are the ACK */
+#define E1000_VT_MSGTYPE_ACK 0x80000000
+/* Msgs below or'd with this are the NACK */
+#define E1000_VT_MSGTYPE_NACK 0x40000000
+/* Indicates that VF is still clear to send requests */
+#define E1000_VT_MSGTYPE_CTS 0x20000000
+#define E1000_VT_MSGINFO_SHIFT 16
+/* bits 23:16 are used for extra info for certain messages */
+#define E1000_VT_MSGINFO_MASK (0xFF << E1000_VT_MSGINFO_SHIFT)
+
+#define E1000_VF_RESET 0x01 /* VF requests reset */
+#define E1000_VF_SET_MAC_ADDR 0x02 /* VF requests to set MAC addr */
+#define E1000_VF_SET_MULTICAST 0x03 /* VF requests to set MC addr */
+#define E1000_VF_SET_MULTICAST_COUNT_MASK (0x1F << E1000_VT_MSGINFO_SHIFT)
+#define E1000_VF_SET_MULTICAST_OVERFLOW (0x80 << E1000_VT_MSGINFO_SHIFT)
+#define E1000_VF_SET_VLAN 0x04 /* VF requests to set VLAN */
+#define E1000_VF_SET_VLAN_ADD (0x01 << E1000_VT_MSGINFO_SHIFT)
+#define E1000_VF_SET_LPE 0x05 /* reqs to set VMOLR.LPE */
+#define E1000_VF_SET_PROMISC 0x06 /* reqs to clear VMOLR.ROPE/MPME*/
+#define E1000_VF_SET_PROMISC_UNICAST (0x01 << E1000_VT_MSGINFO_SHIFT)
+#define E1000_VF_SET_PROMISC_MULTICAST (0x02 << E1000_VT_MSGINFO_SHIFT)
+
+#define E1000_PF_CONTROL_MSG 0x0100 /* PF control message */
+
+#define E1000_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */
+#define E1000_VF_MBX_INIT_DELAY 500 /* microseconds between retries */
+
+s32 e1000_read_mbx(struct e1000_hw *, u32 *, u16, u16);
+s32 e1000_write_mbx(struct e1000_hw *, u32 *, u16, u16);
+s32 e1000_read_posted_mbx(struct e1000_hw *, u32 *, u16, u16);
+s32 e1000_write_posted_mbx(struct e1000_hw *, u32 *, u16, u16);
+s32 e1000_check_for_msg(struct e1000_hw *, u16);
+s32 e1000_check_for_ack(struct e1000_hw *, u16);
+s32 e1000_check_for_rst(struct e1000_hw *, u16);
+void e1000_init_mbx_ops_generic(struct e1000_hw *hw);
+s32 e1000_init_mbx_params_pf(struct e1000_hw *);
+
+#endif /* _E1000_MBX_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
new file mode 100644
index 00000000..6188d007
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
@@ -0,0 +1,965 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+static void e1000_reload_nvm_generic(struct e1000_hw *hw);
+
+/**
+ * e1000_init_nvm_ops_generic - Initialize NVM function pointers
+ * @hw: pointer to the HW structure
+ *
+ * Setups up the function pointers to no-op functions
+ **/
+void e1000_init_nvm_ops_generic(struct e1000_hw *hw)
+{
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ DEBUGFUNC("e1000_init_nvm_ops_generic");
+
+ /* Initialize function pointers */
+ nvm->ops.init_params = e1000_null_ops_generic;
+ nvm->ops.acquire = e1000_null_ops_generic;
+ nvm->ops.read = e1000_null_read_nvm;
+ nvm->ops.release = e1000_null_nvm_generic;
+ nvm->ops.reload = e1000_reload_nvm_generic;
+ nvm->ops.update = e1000_null_ops_generic;
+ nvm->ops.valid_led_default = e1000_null_led_default;
+ nvm->ops.validate = e1000_null_ops_generic;
+ nvm->ops.write = e1000_null_write_nvm;
+}
+
+/**
+ * e1000_null_nvm_read - No-op function, return 0
+ * @hw: pointer to the HW structure
+ **/
+s32 e1000_null_read_nvm(struct e1000_hw E1000_UNUSEDARG *hw,
+ u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b,
+ u16 E1000_UNUSEDARG *c)
+{
+ DEBUGFUNC("e1000_null_read_nvm");
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_null_nvm_generic - No-op function, return void
+ * @hw: pointer to the HW structure
+ **/
+void e1000_null_nvm_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+ DEBUGFUNC("e1000_null_nvm_generic");
+ return;
+}
+
+/**
+ * e1000_null_led_default - No-op function, return 0
+ * @hw: pointer to the HW structure
+ **/
+s32 e1000_null_led_default(struct e1000_hw E1000_UNUSEDARG *hw,
+ u16 E1000_UNUSEDARG *data)
+{
+ DEBUGFUNC("e1000_null_led_default");
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_null_write_nvm - No-op function, return 0
+ * @hw: pointer to the HW structure
+ **/
+s32 e1000_null_write_nvm(struct e1000_hw E1000_UNUSEDARG *hw,
+ u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b,
+ u16 E1000_UNUSEDARG *c)
+{
+ DEBUGFUNC("e1000_null_write_nvm");
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_raise_eec_clk - Raise EEPROM clock
+ * @hw: pointer to the HW structure
+ * @eecd: pointer to the EEPROM
+ *
+ * Enable/Raise the EEPROM clock bit.
+ **/
+static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd)
+{
+ *eecd = *eecd | E1000_EECD_SK;
+ E1000_WRITE_REG(hw, E1000_EECD, *eecd);
+ E1000_WRITE_FLUSH(hw);
+ usec_delay(hw->nvm.delay_usec);
+}
+
+/**
+ * e1000_lower_eec_clk - Lower EEPROM clock
+ * @hw: pointer to the HW structure
+ * @eecd: pointer to the EEPROM
+ *
+ * Clear/Lower the EEPROM clock bit.
+ **/
+static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd)
+{
+ *eecd = *eecd & ~E1000_EECD_SK;
+ E1000_WRITE_REG(hw, E1000_EECD, *eecd);
+ E1000_WRITE_FLUSH(hw);
+ usec_delay(hw->nvm.delay_usec);
+}
+
+/**
+ * e1000_shift_out_eec_bits - Shift data bits our to the EEPROM
+ * @hw: pointer to the HW structure
+ * @data: data to send to the EEPROM
+ * @count: number of bits to shift out
+ *
+ * We need to shift 'count' bits out to the EEPROM. So, the value in the
+ * "data" parameter will be shifted out to the EEPROM one bit at a time.
+ * In order to do this, "data" must be broken down into bits.
+ **/
+static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count)
+{
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+ u32 mask;
+
+ DEBUGFUNC("e1000_shift_out_eec_bits");
+
+ mask = 0x01 << (count - 1);
+ if (nvm->type == e1000_nvm_eeprom_spi)
+ eecd |= E1000_EECD_DO;
+
+ do {
+ eecd &= ~E1000_EECD_DI;
+
+ if (data & mask)
+ eecd |= E1000_EECD_DI;
+
+ E1000_WRITE_REG(hw, E1000_EECD, eecd);
+ E1000_WRITE_FLUSH(hw);
+
+ usec_delay(nvm->delay_usec);
+
+ e1000_raise_eec_clk(hw, &eecd);
+ e1000_lower_eec_clk(hw, &eecd);
+
+ mask >>= 1;
+ } while (mask);
+
+ eecd &= ~E1000_EECD_DI;
+ E1000_WRITE_REG(hw, E1000_EECD, eecd);
+}
+
+/**
+ * e1000_shift_in_eec_bits - Shift data bits in from the EEPROM
+ * @hw: pointer to the HW structure
+ * @count: number of bits to shift in
+ *
+ * In order to read a register from the EEPROM, we need to shift 'count' bits
+ * in from the EEPROM. Bits are "shifted in" by raising the clock input to
+ * the EEPROM (setting the SK bit), and then reading the value of the data out
+ * "DO" bit. During this "shifting in" process the data in "DI" bit should
+ * always be clear.
+ **/
+static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count)
+{
+ u32 eecd;
+ u32 i;
+ u16 data;
+
+ DEBUGFUNC("e1000_shift_in_eec_bits");
+
+ eecd = E1000_READ_REG(hw, E1000_EECD);
+
+ eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
+ data = 0;
+
+ for (i = 0; i < count; i++) {
+ data <<= 1;
+ e1000_raise_eec_clk(hw, &eecd);
+
+ eecd = E1000_READ_REG(hw, E1000_EECD);
+
+ eecd &= ~E1000_EECD_DI;
+ if (eecd & E1000_EECD_DO)
+ data |= 1;
+
+ e1000_lower_eec_clk(hw, &eecd);
+ }
+
+ return data;
+}
+
+/**
+ * e1000_poll_eerd_eewr_done - Poll for EEPROM read/write completion
+ * @hw: pointer to the HW structure
+ * @ee_reg: EEPROM flag for polling
+ *
+ * Polls the EEPROM status bit for either read or write completion based
+ * upon the value of 'ee_reg'.
+ **/
+s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg)
+{
+ u32 attempts = 100000;
+ u32 i, reg = 0;
+
+ DEBUGFUNC("e1000_poll_eerd_eewr_done");
+
+ for (i = 0; i < attempts; i++) {
+ if (ee_reg == E1000_NVM_POLL_READ)
+ reg = E1000_READ_REG(hw, E1000_EERD);
+ else
+ reg = E1000_READ_REG(hw, E1000_EEWR);
+
+ if (reg & E1000_NVM_RW_REG_DONE)
+ return E1000_SUCCESS;
+
+ usec_delay(5);
+ }
+
+ return -E1000_ERR_NVM;
+}
+
+/**
+ * e1000_acquire_nvm_generic - Generic request for access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ * Return successful if access grant bit set, else clear the request for
+ * EEPROM access and return -E1000_ERR_NVM (-1).
+ **/
+s32 e1000_acquire_nvm_generic(struct e1000_hw *hw)
+{
+ u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+ s32 timeout = E1000_NVM_GRANT_ATTEMPTS;
+
+ DEBUGFUNC("e1000_acquire_nvm_generic");
+
+ E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_REQ);
+ eecd = E1000_READ_REG(hw, E1000_EECD);
+
+ while (timeout) {
+ if (eecd & E1000_EECD_GNT)
+ break;
+ usec_delay(5);
+ eecd = E1000_READ_REG(hw, E1000_EECD);
+ timeout--;
+ }
+
+ if (!timeout) {
+ eecd &= ~E1000_EECD_REQ;
+ E1000_WRITE_REG(hw, E1000_EECD, eecd);
+ DEBUGOUT("Could not acquire NVM grant\n");
+ return -E1000_ERR_NVM;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_standby_nvm - Return EEPROM to standby state
+ * @hw: pointer to the HW structure
+ *
+ * Return the EEPROM to a standby state.
+ **/
+static void e1000_standby_nvm(struct e1000_hw *hw)
+{
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+
+ DEBUGFUNC("e1000_standby_nvm");
+
+ if (nvm->type == e1000_nvm_eeprom_spi) {
+ /* Toggle CS to flush commands */
+ eecd |= E1000_EECD_CS;
+ E1000_WRITE_REG(hw, E1000_EECD, eecd);
+ E1000_WRITE_FLUSH(hw);
+ usec_delay(nvm->delay_usec);
+ eecd &= ~E1000_EECD_CS;
+ E1000_WRITE_REG(hw, E1000_EECD, eecd);
+ E1000_WRITE_FLUSH(hw);
+ usec_delay(nvm->delay_usec);
+ }
+}
+
+/**
+ * e1000_stop_nvm - Terminate EEPROM command
+ * @hw: pointer to the HW structure
+ *
+ * Terminates the current command by inverting the EEPROM's chip select pin.
+ **/
+static void e1000_stop_nvm(struct e1000_hw *hw)
+{
+ u32 eecd;
+
+ DEBUGFUNC("e1000_stop_nvm");
+
+ eecd = E1000_READ_REG(hw, E1000_EECD);
+ if (hw->nvm.type == e1000_nvm_eeprom_spi) {
+ /* Pull CS high */
+ eecd |= E1000_EECD_CS;
+ e1000_lower_eec_clk(hw, &eecd);
+ }
+}
+
+/**
+ * e1000_release_nvm_generic - Release exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Stop any current commands to the EEPROM and clear the EEPROM request bit.
+ **/
+void e1000_release_nvm_generic(struct e1000_hw *hw)
+{
+ u32 eecd;
+
+ DEBUGFUNC("e1000_release_nvm_generic");
+
+ e1000_stop_nvm(hw);
+
+ eecd = E1000_READ_REG(hw, E1000_EECD);
+ eecd &= ~E1000_EECD_REQ;
+ E1000_WRITE_REG(hw, E1000_EECD, eecd);
+}
+
+/**
+ * e1000_ready_nvm_eeprom - Prepares EEPROM for read/write
+ * @hw: pointer to the HW structure
+ *
+ * Setups the EEPROM for reading and writing.
+ **/
+static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw)
+{
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+ u8 spi_stat_reg;
+
+ DEBUGFUNC("e1000_ready_nvm_eeprom");
+
+ if (nvm->type == e1000_nvm_eeprom_spi) {
+ u16 timeout = NVM_MAX_RETRY_SPI;
+
+ /* Clear SK and CS */
+ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
+ E1000_WRITE_REG(hw, E1000_EECD, eecd);
+ E1000_WRITE_FLUSH(hw);
+ usec_delay(1);
+
+ /* Read "Status Register" repeatedly until the LSB is cleared.
+ * The EEPROM will signal that the command has been completed
+ * by clearing bit 0 of the internal status register. If it's
+ * not cleared within 'timeout', then error out.
+ */
+ while (timeout) {
+ e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI,
+ hw->nvm.opcode_bits);
+ spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8);
+ if (!(spi_stat_reg & NVM_STATUS_RDY_SPI))
+ break;
+
+ usec_delay(5);
+ e1000_standby_nvm(hw);
+ timeout--;
+ }
+
+ if (!timeout) {
+ DEBUGOUT("SPI NVM Status error\n");
+ return -E1000_ERR_NVM;
+ }
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_nvm_spi - Read EEPROM's using SPI
+ * @hw: pointer to the HW structure
+ * @offset: offset of word in the EEPROM to read
+ * @words: number of words to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM.
+ **/
+s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ u32 i = 0;
+ s32 ret_val;
+ u16 word_in;
+ u8 read_opcode = NVM_READ_OPCODE_SPI;
+
+ DEBUGFUNC("e1000_read_nvm_spi");
+
+ /* A check for invalid values: offset too large, too many words,
+ * and not enough words.
+ */
+ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+ (words == 0)) {
+ DEBUGOUT("nvm parameter(s) out of bounds\n");
+ return -E1000_ERR_NVM;
+ }
+
+ ret_val = nvm->ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = e1000_ready_nvm_eeprom(hw);
+ if (ret_val)
+ goto release;
+
+ e1000_standby_nvm(hw);
+
+ if ((nvm->address_bits == 8) && (offset >= 128))
+ read_opcode |= NVM_A8_OPCODE_SPI;
+
+ /* Send the READ command (opcode + addr) */
+ e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits);
+ e1000_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits);
+
+ /* Read the data. SPI NVMs increment the address with each byte
+ * read and will roll over if reading beyond the end. This allows
+ * us to read the whole NVM from any offset
+ */
+ for (i = 0; i < words; i++) {
+ word_in = e1000_shift_in_eec_bits(hw, 16);
+ data[i] = (word_in >> 8) | (word_in << 8);
+ }
+
+release:
+ nvm->ops.release(hw);
+
+ return ret_val;
+}
+
+/**
+ * e1000_read_nvm_eerd - Reads EEPROM using EERD register
+ * @hw: pointer to the HW structure
+ * @offset: offset of word in the EEPROM to read
+ * @words: number of words to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM using the EERD register.
+ **/
+s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ u32 i, eerd = 0;
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("e1000_read_nvm_eerd");
+
+ /* A check for invalid values: offset too large, too many words,
+ * too many words for the offset, and not enough words.
+ */
+ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+ (words == 0)) {
+ DEBUGOUT("nvm parameter(s) out of bounds\n");
+ return -E1000_ERR_NVM;
+ }
+
+ for (i = 0; i < words; i++) {
+ eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) +
+ E1000_NVM_RW_REG_START;
+
+ E1000_WRITE_REG(hw, E1000_EERD, eerd);
+ ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ);
+ if (ret_val)
+ break;
+
+ data[i] = (E1000_READ_REG(hw, E1000_EERD) >>
+ E1000_NVM_RW_REG_DATA);
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_write_nvm_spi - Write to EEPROM using SPI
+ * @hw: pointer to the HW structure
+ * @offset: offset within the EEPROM to be written to
+ * @words: number of words to write
+ * @data: 16 bit word(s) to be written to the EEPROM
+ *
+ * Writes data to EEPROM at offset using SPI interface.
+ *
+ * If e1000_update_nvm_checksum is not called after this function , the
+ * EEPROM will most likely contain an invalid checksum.
+ **/
+s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ s32 ret_val = -E1000_ERR_NVM;
+ u16 widx = 0;
+
+ DEBUGFUNC("e1000_write_nvm_spi");
+
+ /* A check for invalid values: offset too large, too many words,
+ * and not enough words.
+ */
+ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+ (words == 0)) {
+ DEBUGOUT("nvm parameter(s) out of bounds\n");
+ return -E1000_ERR_NVM;
+ }
+
+ while (widx < words) {
+ u8 write_opcode = NVM_WRITE_OPCODE_SPI;
+
+ ret_val = nvm->ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = e1000_ready_nvm_eeprom(hw);
+ if (ret_val) {
+ nvm->ops.release(hw);
+ return ret_val;
+ }
+
+ e1000_standby_nvm(hw);
+
+ /* Send the WRITE ENABLE command (8 bit opcode) */
+ e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI,
+ nvm->opcode_bits);
+
+ e1000_standby_nvm(hw);
+
+ /* Some SPI eeproms use the 8th address bit embedded in the
+ * opcode
+ */
+ if ((nvm->address_bits == 8) && (offset >= 128))
+ write_opcode |= NVM_A8_OPCODE_SPI;
+
+ /* Send the Write command (8-bit opcode + addr) */
+ e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits);
+ e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2),
+ nvm->address_bits);
+
+ /* Loop to allow for up to whole page write of eeprom */
+ while (widx < words) {
+ u16 word_out = data[widx];
+ word_out = (word_out >> 8) | (word_out << 8);
+ e1000_shift_out_eec_bits(hw, word_out, 16);
+ widx++;
+
+ if ((((offset + widx) * 2) % nvm->page_size) == 0) {
+ e1000_standby_nvm(hw);
+ break;
+ }
+ }
+ msec_delay(10);
+ nvm->ops.release(hw);
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_read_pba_string_generic - Read device part number
+ * @hw: pointer to the HW structure
+ * @pba_num: pointer to device part number
+ * @pba_num_size: size of part number buffer
+ *
+ * Reads the product board assembly (PBA) number from the EEPROM and stores
+ * the value in pba_num.
+ **/
+s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
+ u32 pba_num_size)
+{
+ s32 ret_val;
+ u16 nvm_data;
+ u16 pba_ptr;
+ u16 offset;
+ u16 length;
+
+ DEBUGFUNC("e1000_read_pba_string_generic");
+
+ if (pba_num == NULL) {
+ DEBUGOUT("PBA string buffer was null\n");
+ return -E1000_ERR_INVALID_ARGUMENT;
+ }
+
+ ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+
+ ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+
+ /* if nvm_data is not ptr guard the PBA must be in legacy format which
+ * means pba_ptr is actually our second data word for the PBA number
+ * and we can decode it into an ascii string
+ */
+ if (nvm_data != NVM_PBA_PTR_GUARD) {
+ DEBUGOUT("NVM PBA number is not stored as string\n");
+
+ /* make sure callers buffer is big enough to store the PBA */
+ if (pba_num_size < E1000_PBANUM_LENGTH) {
+ DEBUGOUT("PBA string buffer too small\n");
+ return E1000_ERR_NO_SPACE;
+ }
+
+ /* extract hex string from data and pba_ptr */
+ pba_num[0] = (nvm_data >> 12) & 0xF;
+ pba_num[1] = (nvm_data >> 8) & 0xF;
+ pba_num[2] = (nvm_data >> 4) & 0xF;
+ pba_num[3] = nvm_data & 0xF;
+ pba_num[4] = (pba_ptr >> 12) & 0xF;
+ pba_num[5] = (pba_ptr >> 8) & 0xF;
+ pba_num[6] = '-';
+ pba_num[7] = 0;
+ pba_num[8] = (pba_ptr >> 4) & 0xF;
+ pba_num[9] = pba_ptr & 0xF;
+
+ /* put a null character on the end of our string */
+ pba_num[10] = '\0';
+
+ /* switch all the data but the '-' to hex char */
+ for (offset = 0; offset < 10; offset++) {
+ if (pba_num[offset] < 0xA)
+ pba_num[offset] += '0';
+ else if (pba_num[offset] < 0x10)
+ pba_num[offset] += 'A' - 0xA;
+ }
+
+ return E1000_SUCCESS;
+ }
+
+ ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+
+ if (length == 0xFFFF || length == 0) {
+ DEBUGOUT("NVM PBA number section invalid length\n");
+ return -E1000_ERR_NVM_PBA_SECTION;
+ }
+ /* check if pba_num buffer is big enough */
+ if (pba_num_size < (((u32)length * 2) - 1)) {
+ DEBUGOUT("PBA string buffer too small\n");
+ return -E1000_ERR_NO_SPACE;
+ }
+
+ /* trim pba length from start of string */
+ pba_ptr++;
+ length--;
+
+ for (offset = 0; offset < length; offset++) {
+ ret_val = hw->nvm.ops.read(hw, pba_ptr + offset, 1, &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+ pba_num[offset * 2] = (u8)(nvm_data >> 8);
+ pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF);
+ }
+ pba_num[offset * 2] = '\0';
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_pba_length_generic - Read device part number length
+ * @hw: pointer to the HW structure
+ * @pba_num_size: size of part number buffer
+ *
+ * Reads the product board assembly (PBA) number length from the EEPROM and
+ * stores the value in pba_num_size.
+ **/
+s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size)
+{
+ s32 ret_val;
+ u16 nvm_data;
+ u16 pba_ptr;
+ u16 length;
+
+ DEBUGFUNC("e1000_read_pba_length_generic");
+
+ if (pba_num_size == NULL) {
+ DEBUGOUT("PBA buffer size was null\n");
+ return -E1000_ERR_INVALID_ARGUMENT;
+ }
+
+ ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+
+ ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+
+ /* if data is not ptr guard the PBA must be in legacy format */
+ if (nvm_data != NVM_PBA_PTR_GUARD) {
+ *pba_num_size = E1000_PBANUM_LENGTH;
+ return E1000_SUCCESS;
+ }
+
+ ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+
+ if (length == 0xFFFF || length == 0) {
+ DEBUGOUT("NVM PBA number section invalid length\n");
+ return -E1000_ERR_NVM_PBA_SECTION;
+ }
+
+ /* Convert from length in u16 values to u8 chars, add 1 for NULL,
+ * and subtract 2 because length field is included in length.
+ */
+ *pba_num_size = ((u32)length * 2) - 1;
+
+ return E1000_SUCCESS;
+}
+
+
+
+
+
+/**
+ * e1000_read_mac_addr_generic - Read device MAC address
+ * @hw: pointer to the HW structure
+ *
+ * Reads the device MAC address from the EEPROM and stores the value.
+ * Since devices with two ports use the same EEPROM, we increment the
+ * last bit in the MAC address for the second port.
+ **/
+s32 e1000_read_mac_addr_generic(struct e1000_hw *hw)
+{
+ u32 rar_high;
+ u32 rar_low;
+ u16 i;
+
+ rar_high = E1000_READ_REG(hw, E1000_RAH(0));
+ rar_low = E1000_READ_REG(hw, E1000_RAL(0));
+
+ for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++)
+ hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8));
+
+ for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++)
+ hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8));
+
+ for (i = 0; i < ETH_ADDR_LEN; i++)
+ hw->mac.addr[i] = hw->mac.perm_addr[i];
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_validate_nvm_checksum_generic - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ * and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ **/
+s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw)
+{
+ s32 ret_val;
+ u16 checksum = 0;
+ u16 i, nvm_data;
+
+ DEBUGFUNC("e1000_validate_nvm_checksum_generic");
+
+ for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
+ ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error\n");
+ return ret_val;
+ }
+ checksum += nvm_data;
+ }
+
+ if (checksum != (u16) NVM_SUM) {
+ DEBUGOUT("NVM Checksum Invalid\n");
+ return -E1000_ERR_NVM;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_update_nvm_checksum_generic - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ * up to the checksum. Then calculates the EEPROM checksum and writes the
+ * value to the EEPROM.
+ **/
+s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw)
+{
+ s32 ret_val;
+ u16 checksum = 0;
+ u16 i, nvm_data;
+
+ DEBUGFUNC("e1000_update_nvm_checksum");
+
+ for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+ ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+ if (ret_val) {
+ DEBUGOUT("NVM Read Error while updating checksum.\n");
+ return ret_val;
+ }
+ checksum += nvm_data;
+ }
+ checksum = (u16) NVM_SUM - checksum;
+ ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum);
+ if (ret_val)
+ DEBUGOUT("NVM Write Error while updating checksum.\n");
+
+ return ret_val;
+}
+
+/**
+ * e1000_reload_nvm_generic - Reloads EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the
+ * extended control register.
+ **/
+static void e1000_reload_nvm_generic(struct e1000_hw *hw)
+{
+ u32 ctrl_ext;
+
+ DEBUGFUNC("e1000_reload_nvm_generic");
+
+ usec_delay(10);
+ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_EXT_EE_RST;
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+ E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ * e1000_get_fw_version - Get firmware version information
+ * @hw: pointer to the HW structure
+ * @fw_vers: pointer to output version structure
+ *
+ * unsupported/not present features return 0 in version structure
+ **/
+void e1000_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
+{
+ u16 eeprom_verh, eeprom_verl, etrack_test, fw_version;
+ u8 q, hval, rem, result;
+ u16 comb_verh, comb_verl, comb_offset;
+
+ memset(fw_vers, 0, sizeof(struct e1000_fw_version));
+
+ /* basic eeprom version numbers, bits used vary by part and by tool
+ * used to create the nvm images */
+ /* Check which data format we have */
+ hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test);
+ switch (hw->mac.type) {
+ case e1000_i211:
+ e1000_read_invm_version(hw, fw_vers);
+ return;
+ case e1000_82575:
+ case e1000_82576:
+ case e1000_82580:
+ /* Use this format, unless EETRACK ID exists,
+ * then use alternate format
+ */
+ if ((etrack_test & NVM_MAJOR_MASK) != NVM_ETRACK_VALID) {
+ hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+ fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+ >> NVM_MAJOR_SHIFT;
+ fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK)
+ >> NVM_MINOR_SHIFT;
+ fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK);
+ goto etrack_id;
+ }
+ break;
+ case e1000_i210:
+ if (!(e1000_get_flash_presence_i210(hw))) {
+ e1000_read_invm_version(hw, fw_vers);
+ return;
+ }
+ /* fall through */
+ case e1000_i350:
+ case e1000_i354:
+ /* find combo image version */
+ hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
+ if ((comb_offset != 0x0) &&
+ (comb_offset != NVM_VER_INVALID)) {
+
+ hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
+ + 1), 1, &comb_verh);
+ hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset),
+ 1, &comb_verl);
+
+ /* get Option Rom version if it exists and is valid */
+ if ((comb_verh && comb_verl) &&
+ ((comb_verh != NVM_VER_INVALID) &&
+ (comb_verl != NVM_VER_INVALID))) {
+
+ fw_vers->or_valid = true;
+ fw_vers->or_major =
+ comb_verl >> NVM_COMB_VER_SHFT;
+ fw_vers->or_build =
+ (comb_verl << NVM_COMB_VER_SHFT)
+ | (comb_verh >> NVM_COMB_VER_SHFT);
+ fw_vers->or_patch =
+ comb_verh & NVM_COMB_VER_MASK;
+ }
+ }
+ break;
+ default:
+ return;
+ }
+ hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+ fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+ >> NVM_MAJOR_SHIFT;
+
+ /* check for old style version format in newer images*/
+ if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) {
+ eeprom_verl = (fw_version & NVM_COMB_VER_MASK);
+ } else {
+ eeprom_verl = (fw_version & NVM_MINOR_MASK)
+ >> NVM_MINOR_SHIFT;
+ }
+ /* Convert minor value to hex before assigning to output struct
+ * Val to be converted will not be higher than 99, per tool output
+ */
+ q = eeprom_verl / NVM_HEX_CONV;
+ hval = q * NVM_HEX_TENS;
+ rem = eeprom_verl % NVM_HEX_CONV;
+ result = hval + rem;
+ fw_vers->eep_minor = result;
+
+etrack_id:
+ if ((etrack_test & NVM_MAJOR_MASK) == NVM_ETRACK_VALID) {
+ hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl);
+ hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh);
+ fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT)
+ | eeprom_verl;
+ }
+ return;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
new file mode 100644
index 00000000..fe62785a
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
@@ -0,0 +1,75 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_NVM_H_
+#define _E1000_NVM_H_
+
+
+struct e1000_fw_version {
+ u32 etrack_id;
+ u16 eep_major;
+ u16 eep_minor;
+ u16 eep_build;
+
+ u8 invm_major;
+ u8 invm_minor;
+ u8 invm_img_type;
+
+ bool or_valid;
+ u16 or_major;
+ u16 or_build;
+ u16 or_patch;
+};
+
+
+void e1000_init_nvm_ops_generic(struct e1000_hw *hw);
+s32 e1000_null_read_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c);
+void e1000_null_nvm_generic(struct e1000_hw *hw);
+s32 e1000_null_led_default(struct e1000_hw *hw, u16 *data);
+s32 e1000_null_write_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c);
+s32 e1000_acquire_nvm_generic(struct e1000_hw *hw);
+
+s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg);
+s32 e1000_read_mac_addr_generic(struct e1000_hw *hw);
+s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
+ u32 pba_num_size);
+s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size);
+s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
+s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words,
+ u16 *data);
+s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data);
+s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw);
+s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words,
+ u16 *data);
+s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw);
+void e1000_release_nvm_generic(struct e1000_hw *hw);
+void e1000_get_fw_version(struct e1000_hw *hw,
+ struct e1000_fw_version *fw_vers);
+
+#define E1000_STM_OPCODE 0xDB00
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
new file mode 100644
index 00000000..d1cf98e2
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
@@ -0,0 +1,136 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+/* glue for the OS independent part of e1000
+ * includes register access macros
+ */
+
+#ifndef _E1000_OSDEP_H_
+#define _E1000_OSDEP_H_
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/sched.h>
+#include "kcompat.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#define usec_delay(x) udelay(x)
+#define usec_delay_irq(x) udelay(x)
+#ifndef msec_delay
+#define msec_delay(x) do { \
+ /* Don't mdelay in interrupt context! */ \
+ if (in_interrupt()) \
+ BUG(); \
+ else \
+ msleep(x); \
+} while (0)
+
+/* Some workarounds require millisecond delays and are run during interrupt
+ * context. Most notably, when establishing link, the phy may need tweaking
+ * but cannot process phy register reads/writes faster than millisecond
+ * intervals...and we establish link due to a "link status change" interrupt.
+ */
+#define msec_delay_irq(x) mdelay(x)
+#endif
+
+#define PCI_COMMAND_REGISTER PCI_COMMAND
+#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE
+#define ETH_ADDR_LEN ETH_ALEN
+
+#ifdef __BIG_ENDIAN
+#define E1000_BIG_ENDIAN __BIG_ENDIAN
+#endif
+
+
+#ifdef DEBUG
+#define DEBUGOUT(S) printk(KERN_DEBUG S)
+#define DEBUGOUT1(S, A...) printk(KERN_DEBUG S, ## A)
+#else
+#define DEBUGOUT(S)
+#define DEBUGOUT1(S, A...)
+#endif
+
+#ifdef DEBUG_FUNC
+#define DEBUGFUNC(F) DEBUGOUT(F "\n")
+#else
+#define DEBUGFUNC(F)
+#endif
+#define DEBUGOUT2 DEBUGOUT1
+#define DEBUGOUT3 DEBUGOUT2
+#define DEBUGOUT7 DEBUGOUT3
+
+#define E1000_REGISTER(a, reg) reg
+
+#define E1000_WRITE_REG(a, reg, value) ( \
+ writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg))))
+
+#define E1000_READ_REG(a, reg) (readl((a)->hw_addr + E1000_REGISTER(a, reg)))
+
+#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \
+ writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2))))
+
+#define E1000_READ_REG_ARRAY(a, reg, offset) ( \
+ readl((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2)))
+
+#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY
+#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY
+
+#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \
+ writew((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1))))
+
+#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \
+ readw((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1)))
+
+#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \
+ writeb((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + (offset))))
+
+#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \
+ readb((a)->hw_addr + E1000_REGISTER(a, reg) + (offset)))
+
+#define E1000_WRITE_REG_IO(a, reg, offset) do { \
+ outl(reg, ((a)->io_base)); \
+ outl(offset, ((a)->io_base + 4)); } while (0)
+
+#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS)
+
+#define E1000_WRITE_FLASH_REG(a, reg, value) ( \
+ writel((value), ((a)->flash_address + reg)))
+
+#define E1000_WRITE_FLASH_REG16(a, reg, value) ( \
+ writew((value), ((a)->flash_address + reg)))
+
+#define E1000_READ_FLASH_REG(a, reg) (readl((a)->flash_address + reg))
+
+#define E1000_READ_FLASH_REG16(a, reg) (readw((a)->flash_address + reg))
+
+#endif /* _E1000_OSDEP_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
new file mode 100644
index 00000000..df224702
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
@@ -0,0 +1,3405 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+static s32 e1000_wait_autoneg(struct e1000_hw *hw);
+/* Cable length tables */
+static const u16 e1000_m88_cable_length_table[] = {
+ 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED };
+#define M88E1000_CABLE_LENGTH_TABLE_SIZE \
+ (sizeof(e1000_m88_cable_length_table) / \
+ sizeof(e1000_m88_cable_length_table[0]))
+
+static const u16 e1000_igp_2_cable_length_table[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, 0, 0, 0, 3,
+ 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, 6, 10, 14, 18, 22,
+ 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, 21, 26, 31, 35, 40,
+ 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, 40, 45, 51, 56, 61,
+ 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, 60, 66, 72, 77, 82,
+ 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, 83, 89, 95,
+ 100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121,
+ 124};
+#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \
+ (sizeof(e1000_igp_2_cable_length_table) / \
+ sizeof(e1000_igp_2_cable_length_table[0]))
+
+/**
+ * e1000_init_phy_ops_generic - Initialize PHY function pointers
+ * @hw: pointer to the HW structure
+ *
+ * Setups up the function pointers to no-op functions
+ **/
+void e1000_init_phy_ops_generic(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ DEBUGFUNC("e1000_init_phy_ops_generic");
+
+ /* Initialize function pointers */
+ phy->ops.init_params = e1000_null_ops_generic;
+ phy->ops.acquire = e1000_null_ops_generic;
+ phy->ops.check_polarity = e1000_null_ops_generic;
+ phy->ops.check_reset_block = e1000_null_ops_generic;
+ phy->ops.commit = e1000_null_ops_generic;
+ phy->ops.force_speed_duplex = e1000_null_ops_generic;
+ phy->ops.get_cfg_done = e1000_null_ops_generic;
+ phy->ops.get_cable_length = e1000_null_ops_generic;
+ phy->ops.get_info = e1000_null_ops_generic;
+ phy->ops.set_page = e1000_null_set_page;
+ phy->ops.read_reg = e1000_null_read_reg;
+ phy->ops.read_reg_locked = e1000_null_read_reg;
+ phy->ops.read_reg_page = e1000_null_read_reg;
+ phy->ops.release = e1000_null_phy_generic;
+ phy->ops.reset = e1000_null_ops_generic;
+ phy->ops.set_d0_lplu_state = e1000_null_lplu_state;
+ phy->ops.set_d3_lplu_state = e1000_null_lplu_state;
+ phy->ops.write_reg = e1000_null_write_reg;
+ phy->ops.write_reg_locked = e1000_null_write_reg;
+ phy->ops.write_reg_page = e1000_null_write_reg;
+ phy->ops.power_up = e1000_null_phy_generic;
+ phy->ops.power_down = e1000_null_phy_generic;
+ phy->ops.read_i2c_byte = e1000_read_i2c_byte_null;
+ phy->ops.write_i2c_byte = e1000_write_i2c_byte_null;
+}
+
+/**
+ * e1000_null_set_page - No-op function, return 0
+ * @hw: pointer to the HW structure
+ **/
+s32 e1000_null_set_page(struct e1000_hw E1000_UNUSEDARG *hw,
+ u16 E1000_UNUSEDARG data)
+{
+ DEBUGFUNC("e1000_null_set_page");
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_null_read_reg - No-op function, return 0
+ * @hw: pointer to the HW structure
+ **/
+s32 e1000_null_read_reg(struct e1000_hw E1000_UNUSEDARG *hw,
+ u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG *data)
+{
+ DEBUGFUNC("e1000_null_read_reg");
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_null_phy_generic - No-op function, return void
+ * @hw: pointer to the HW structure
+ **/
+void e1000_null_phy_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+ DEBUGFUNC("e1000_null_phy_generic");
+ return;
+}
+
+/**
+ * e1000_null_lplu_state - No-op function, return 0
+ * @hw: pointer to the HW structure
+ **/
+s32 e1000_null_lplu_state(struct e1000_hw E1000_UNUSEDARG *hw,
+ bool E1000_UNUSEDARG active)
+{
+ DEBUGFUNC("e1000_null_lplu_state");
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_null_write_reg - No-op function, return 0
+ * @hw: pointer to the HW structure
+ **/
+s32 e1000_null_write_reg(struct e1000_hw E1000_UNUSEDARG *hw,
+ u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG data)
+{
+ DEBUGFUNC("e1000_null_write_reg");
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_i2c_byte_null - No-op function, return 0
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to write
+ * @dev_addr: device address
+ * @data: data value read
+ *
+ **/
+s32 e1000_read_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw,
+ u8 E1000_UNUSEDARG byte_offset,
+ u8 E1000_UNUSEDARG dev_addr,
+ u8 E1000_UNUSEDARG *data)
+{
+ DEBUGFUNC("e1000_read_i2c_byte_null");
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_i2c_byte_null - No-op function, return 0
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to write
+ * @dev_addr: device address
+ * @data: data value to write
+ *
+ **/
+s32 e1000_write_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw,
+ u8 E1000_UNUSEDARG byte_offset,
+ u8 E1000_UNUSEDARG dev_addr,
+ u8 E1000_UNUSEDARG data)
+{
+ DEBUGFUNC("e1000_write_i2c_byte_null");
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_check_reset_block_generic - Check if PHY reset is blocked
+ * @hw: pointer to the HW structure
+ *
+ * Read the PHY management control register and check whether a PHY reset
+ * is blocked. If a reset is not blocked return E1000_SUCCESS, otherwise
+ * return E1000_BLK_PHY_RESET (12).
+ **/
+s32 e1000_check_reset_block_generic(struct e1000_hw *hw)
+{
+ u32 manc;
+
+ DEBUGFUNC("e1000_check_reset_block");
+
+ manc = E1000_READ_REG(hw, E1000_MANC);
+
+ return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ?
+ E1000_BLK_PHY_RESET : E1000_SUCCESS;
+}
+
+/**
+ * e1000_get_phy_id - Retrieve the PHY ID and revision
+ * @hw: pointer to the HW structure
+ *
+ * Reads the PHY registers and stores the PHY ID and possibly the PHY
+ * revision in the hardware structure.
+ **/
+s32 e1000_get_phy_id(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val = E1000_SUCCESS;
+ u16 phy_id;
+
+ DEBUGFUNC("e1000_get_phy_id");
+
+ if (!phy->ops.read_reg)
+ return E1000_SUCCESS;
+
+ ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id);
+ if (ret_val)
+ return ret_val;
+
+ phy->id = (u32)(phy_id << 16);
+ usec_delay(20);
+ ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id);
+ if (ret_val)
+ return ret_val;
+
+ phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
+ phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
+
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_reset_dsp_generic - Reset PHY DSP
+ * @hw: pointer to the HW structure
+ *
+ * Reset the digital signal processor.
+ **/
+s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_phy_reset_dsp_generic");
+
+ if (!hw->phy.ops.write_reg)
+ return E1000_SUCCESS;
+
+ ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1);
+ if (ret_val)
+ return ret_val;
+
+ return hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0);
+}
+
+/**
+ * e1000_read_phy_reg_mdic - Read MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Reads the MDI control register in the PHY at offset and stores the
+ * information read to data.
+ **/
+s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ u32 i, mdic = 0;
+
+ DEBUGFUNC("e1000_read_phy_reg_mdic");
+
+ if (offset > MAX_PHY_REG_ADDRESS) {
+ DEBUGOUT1("PHY Address %d is out of range\n", offset);
+ return -E1000_ERR_PARAM;
+ }
+
+ /* Set up Op-code, Phy Address, and register offset in the MDI
+ * Control register. The MAC will take care of interfacing with the
+ * PHY to retrieve the desired data.
+ */
+ mdic = ((offset << E1000_MDIC_REG_SHIFT) |
+ (phy->addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_READ));
+
+ E1000_WRITE_REG(hw, E1000_MDIC, mdic);
+
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+ usec_delay_irq(50);
+ mdic = E1000_READ_REG(hw, E1000_MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ DEBUGOUT("MDI Read did not complete\n");
+ return -E1000_ERR_PHY;
+ }
+ if (mdic & E1000_MDIC_ERROR) {
+ DEBUGOUT("MDI Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+ DEBUGOUT2("MDI Read offset error - requested %d, returned %d\n",
+ offset,
+ (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+ return -E1000_ERR_PHY;
+ }
+ *data = (u16) mdic;
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_phy_reg_mdic - Write MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write to register at offset
+ *
+ * Writes data to MDI control register in the PHY at offset.
+ **/
+s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ u32 i, mdic = 0;
+
+ DEBUGFUNC("e1000_write_phy_reg_mdic");
+
+ if (offset > MAX_PHY_REG_ADDRESS) {
+ DEBUGOUT1("PHY Address %d is out of range\n", offset);
+ return -E1000_ERR_PARAM;
+ }
+
+ /* Set up Op-code, Phy Address, and register offset in the MDI
+ * Control register. The MAC will take care of interfacing with the
+ * PHY to retrieve the desired data.
+ */
+ mdic = (((u32)data) |
+ (offset << E1000_MDIC_REG_SHIFT) |
+ (phy->addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_WRITE));
+
+ E1000_WRITE_REG(hw, E1000_MDIC, mdic);
+
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+ usec_delay_irq(50);
+ mdic = E1000_READ_REG(hw, E1000_MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ DEBUGOUT("MDI Write did not complete\n");
+ return -E1000_ERR_PHY;
+ }
+ if (mdic & E1000_MDIC_ERROR) {
+ DEBUGOUT("MDI Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+ DEBUGOUT2("MDI Write offset error - requested %d, returned %d\n",
+ offset,
+ (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+ return -E1000_ERR_PHY;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_phy_reg_i2c - Read PHY register using i2c
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Reads the PHY register at offset using the i2c interface and stores the
+ * retrieved information in data.
+ **/
+s32 e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ u32 i, i2ccmd = 0;
+
+ DEBUGFUNC("e1000_read_phy_reg_i2c");
+
+ /* Set up Op-code, Phy Address, and register address in the I2CCMD
+ * register. The MAC will take care of interfacing with the
+ * PHY to retrieve the desired data.
+ */
+ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
+ (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) |
+ (E1000_I2CCMD_OPCODE_READ));
+
+ E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
+
+ /* Poll the ready bit to see if the I2C read completed */
+ for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
+ usec_delay(50);
+ i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD);
+ if (i2ccmd & E1000_I2CCMD_READY)
+ break;
+ }
+ if (!(i2ccmd & E1000_I2CCMD_READY)) {
+ DEBUGOUT("I2CCMD Read did not complete\n");
+ return -E1000_ERR_PHY;
+ }
+ if (i2ccmd & E1000_I2CCMD_ERROR) {
+ DEBUGOUT("I2CCMD Error bit set\n");
+ return -E1000_ERR_PHY;
+ }
+
+ /* Need to byte-swap the 16-bit value. */
+ *data = ((i2ccmd >> 8) & 0x00FF) | ((i2ccmd << 8) & 0xFF00);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_phy_reg_i2c - Write PHY register using i2c
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Writes the data to PHY register at the offset using the i2c interface.
+ **/
+s32 e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ u32 i, i2ccmd = 0;
+ u16 phy_data_swapped;
+
+ DEBUGFUNC("e1000_write_phy_reg_i2c");
+
+ /* Prevent overwritting SFP I2C EEPROM which is at A0 address.*/
+ if ((hw->phy.addr == 0) || (hw->phy.addr > 7)) {
+ DEBUGOUT1("PHY I2C Address %d is out of range.\n",
+ hw->phy.addr);
+ return -E1000_ERR_CONFIG;
+ }
+
+ /* Swap the data bytes for the I2C interface */
+ phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00);
+
+ /* Set up Op-code, Phy Address, and register address in the I2CCMD
+ * register. The MAC will take care of interfacing with the
+ * PHY to retrieve the desired data.
+ */
+ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
+ (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) |
+ E1000_I2CCMD_OPCODE_WRITE |
+ phy_data_swapped);
+
+ E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
+
+ /* Poll the ready bit to see if the I2C read completed */
+ for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
+ usec_delay(50);
+ i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD);
+ if (i2ccmd & E1000_I2CCMD_READY)
+ break;
+ }
+ if (!(i2ccmd & E1000_I2CCMD_READY)) {
+ DEBUGOUT("I2CCMD Write did not complete\n");
+ return -E1000_ERR_PHY;
+ }
+ if (i2ccmd & E1000_I2CCMD_ERROR) {
+ DEBUGOUT("I2CCMD Error bit set\n");
+ return -E1000_ERR_PHY;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_sfp_data_byte - Reads SFP module data.
+ * @hw: pointer to the HW structure
+ * @offset: byte location offset to be read
+ * @data: read data buffer pointer
+ *
+ * Reads one byte from SFP module data stored
+ * in SFP resided EEPROM memory or SFP diagnostic area.
+ * Function should be called with
+ * E1000_I2CCMD_SFP_DATA_ADDR(<byte offset>) for SFP module database access
+ * E1000_I2CCMD_SFP_DIAG_ADDR(<byte offset>) for SFP diagnostics parameters
+ * access
+ **/
+s32 e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data)
+{
+ u32 i = 0;
+ u32 i2ccmd = 0;
+ u32 data_local = 0;
+
+ DEBUGFUNC("e1000_read_sfp_data_byte");
+
+ if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) {
+ DEBUGOUT("I2CCMD command address exceeds upper limit\n");
+ return -E1000_ERR_PHY;
+ }
+
+ /* Set up Op-code, EEPROM Address,in the I2CCMD
+ * register. The MAC will take care of interfacing with the
+ * EEPROM to retrieve the desired data.
+ */
+ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
+ E1000_I2CCMD_OPCODE_READ);
+
+ E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
+
+ /* Poll the ready bit to see if the I2C read completed */
+ for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
+ usec_delay(50);
+ data_local = E1000_READ_REG(hw, E1000_I2CCMD);
+ if (data_local & E1000_I2CCMD_READY)
+ break;
+ }
+ if (!(data_local & E1000_I2CCMD_READY)) {
+ DEBUGOUT("I2CCMD Read did not complete\n");
+ return -E1000_ERR_PHY;
+ }
+ if (data_local & E1000_I2CCMD_ERROR) {
+ DEBUGOUT("I2CCMD Error bit set\n");
+ return -E1000_ERR_PHY;
+ }
+ *data = (u8) data_local & 0xFF;
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_sfp_data_byte - Writes SFP module data.
+ * @hw: pointer to the HW structure
+ * @offset: byte location offset to write to
+ * @data: data to write
+ *
+ * Writes one byte to SFP module data stored
+ * in SFP resided EEPROM memory or SFP diagnostic area.
+ * Function should be called with
+ * E1000_I2CCMD_SFP_DATA_ADDR(<byte offset>) for SFP module database access
+ * E1000_I2CCMD_SFP_DIAG_ADDR(<byte offset>) for SFP diagnostics parameters
+ * access
+ **/
+s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data)
+{
+ u32 i = 0;
+ u32 i2ccmd = 0;
+ u32 data_local = 0;
+
+ DEBUGFUNC("e1000_write_sfp_data_byte");
+
+ if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) {
+ DEBUGOUT("I2CCMD command address exceeds upper limit\n");
+ return -E1000_ERR_PHY;
+ }
+ /* The programming interface is 16 bits wide
+ * so we need to read the whole word first
+ * then update appropriate byte lane and write
+ * the updated word back.
+ */
+ /* Set up Op-code, EEPROM Address,in the I2CCMD
+ * register. The MAC will take care of interfacing
+ * with an EEPROM to write the data given.
+ */
+ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
+ E1000_I2CCMD_OPCODE_READ);
+ /* Set a command to read single word */
+ E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
+ for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
+ usec_delay(50);
+ /* Poll the ready bit to see if lastly
+ * launched I2C operation completed
+ */
+ i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD);
+ if (i2ccmd & E1000_I2CCMD_READY) {
+ /* Check if this is READ or WRITE phase */
+ if ((i2ccmd & E1000_I2CCMD_OPCODE_READ) ==
+ E1000_I2CCMD_OPCODE_READ) {
+ /* Write the selected byte
+ * lane and update whole word
+ */
+ data_local = i2ccmd & 0xFF00;
+ data_local |= data;
+ i2ccmd = ((offset <<
+ E1000_I2CCMD_REG_ADDR_SHIFT) |
+ E1000_I2CCMD_OPCODE_WRITE | data_local);
+ E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
+ } else {
+ break;
+ }
+ }
+ }
+ if (!(i2ccmd & E1000_I2CCMD_READY)) {
+ DEBUGOUT("I2CCMD Write did not complete\n");
+ return -E1000_ERR_PHY;
+ }
+ if (i2ccmd & E1000_I2CCMD_ERROR) {
+ DEBUGOUT("I2CCMD Error bit set\n");
+ return -E1000_ERR_PHY;
+ }
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_phy_reg_m88 - Read m88 PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Acquires semaphore, if necessary, then reads the PHY register at offset
+ * and storing the retrieved information in data. Release any acquired
+ * semaphores before exiting.
+ **/
+s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_read_phy_reg_m88");
+
+ if (!hw->phy.ops.acquire)
+ return E1000_SUCCESS;
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+ data);
+
+ hw->phy.ops.release(hw);
+
+ return ret_val;
+}
+
+/**
+ * e1000_write_phy_reg_m88 - Write m88 PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Acquires semaphore, if necessary, then writes the data to PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ **/
+s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data)
+{
+ s32 ret_val;
+
+ DEBUGFUNC("e1000_write_phy_reg_m88");
+
+ if (!hw->phy.ops.acquire)
+ return E1000_SUCCESS;
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+ data);
+
+ hw->phy.ops.release(hw);
+
+ return ret_val;
+}
+
+/**
+ * e1000_set_page_igp - Set page as on IGP-like PHY(s)
+ * @hw: pointer to the HW structure
+ * @page: page to set (shifted left when necessary)
+ *
+ * Sets PHY page required for PHY register access. Assumes semaphore is
+ * already acquired. Note, this function sets phy.addr to 1 so the caller
+ * must set it appropriately (if necessary) after this function returns.
+ **/
+s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page)
+{
+ DEBUGFUNC("e1000_set_page_igp");
+
+ DEBUGOUT1("Setting page 0x%x\n", page);
+
+ hw->phy.addr = 1;
+
+ return e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, page);
+}
+
+/**
+ * __e1000_read_phy_reg_igp - Read igp PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ * @locked: semaphore has already been acquired or not
+ *
+ * Acquires semaphore, if necessary, then reads the PHY register at offset
+ * and stores the retrieved information in data. Release any acquired
+ * semaphores before exiting.
+ **/
+static s32 __e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data,
+ bool locked)
+{
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("__e1000_read_phy_reg_igp");
+
+ if (!locked) {
+ if (!hw->phy.ops.acquire)
+ return E1000_SUCCESS;
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+ }
+
+ if (offset > MAX_PHY_MULTI_PAGE_REG)
+ ret_val = e1000_write_phy_reg_mdic(hw,
+ IGP01E1000_PHY_PAGE_SELECT,
+ (u16)offset);
+ if (!ret_val)
+ ret_val = e1000_read_phy_reg_mdic(hw,
+ MAX_PHY_REG_ADDRESS & offset,
+ data);
+ if (!locked)
+ hw->phy.ops.release(hw);
+
+ return ret_val;
+}
+
+/**
+ * e1000_read_phy_reg_igp - Read igp PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Acquires semaphore then reads the PHY register at offset and stores the
+ * retrieved information in data.
+ * Release the acquired semaphore before exiting.
+ **/
+s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+ return __e1000_read_phy_reg_igp(hw, offset, data, false);
+}
+
+/**
+ * e1000_read_phy_reg_igp_locked - Read igp PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Reads the PHY register at offset and stores the retrieved information
+ * in data. Assumes semaphore already acquired.
+ **/
+s32 e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+ return __e1000_read_phy_reg_igp(hw, offset, data, true);
+}
+
+/**
+ * e1000_write_phy_reg_igp - Write igp PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ * @locked: semaphore has already been acquired or not
+ *
+ * Acquires semaphore, if necessary, then writes the data to PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ **/
+static s32 __e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data,
+ bool locked)
+{
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("e1000_write_phy_reg_igp");
+
+ if (!locked) {
+ if (!hw->phy.ops.acquire)
+ return E1000_SUCCESS;
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+ }
+
+ if (offset > MAX_PHY_MULTI_PAGE_REG)
+ ret_val = e1000_write_phy_reg_mdic(hw,
+ IGP01E1000_PHY_PAGE_SELECT,
+ (u16)offset);
+ if (!ret_val)
+ ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS &
+ offset,
+ data);
+ if (!locked)
+ hw->phy.ops.release(hw);
+
+ return ret_val;
+}
+
+/**
+ * e1000_write_phy_reg_igp - Write igp PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Acquires semaphore then writes the data to PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ **/
+s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data)
+{
+ return __e1000_write_phy_reg_igp(hw, offset, data, false);
+}
+
+/**
+ * e1000_write_phy_reg_igp_locked - Write igp PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Writes the data to PHY register at the offset.
+ * Assumes semaphore already acquired.
+ **/
+s32 e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data)
+{
+ return __e1000_write_phy_reg_igp(hw, offset, data, true);
+}
+
+/**
+ * __e1000_read_kmrn_reg - Read kumeran register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ * @locked: semaphore has already been acquired or not
+ *
+ * Acquires semaphore, if necessary. Then reads the PHY register at offset
+ * using the kumeran interface. The information retrieved is stored in data.
+ * Release any acquired semaphores before exiting.
+ **/
+static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data,
+ bool locked)
+{
+ u32 kmrnctrlsta;
+
+ DEBUGFUNC("__e1000_read_kmrn_reg");
+
+ if (!locked) {
+ s32 ret_val = E1000_SUCCESS;
+
+ if (!hw->phy.ops.acquire)
+ return E1000_SUCCESS;
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+ }
+
+ kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
+ E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN;
+ E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta);
+ E1000_WRITE_FLUSH(hw);
+
+ usec_delay(2);
+
+ kmrnctrlsta = E1000_READ_REG(hw, E1000_KMRNCTRLSTA);
+ *data = (u16)kmrnctrlsta;
+
+ if (!locked)
+ hw->phy.ops.release(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_kmrn_reg_generic - Read kumeran register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Acquires semaphore then reads the PHY register at offset using the
+ * kumeran interface. The information retrieved is stored in data.
+ * Release the acquired semaphore before exiting.
+ **/
+s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+ return __e1000_read_kmrn_reg(hw, offset, data, false);
+}
+
+/**
+ * e1000_read_kmrn_reg_locked - Read kumeran register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Reads the PHY register at offset using the kumeran interface. The
+ * information retrieved is stored in data.
+ * Assumes semaphore already acquired.
+ **/
+s32 e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+ return __e1000_read_kmrn_reg(hw, offset, data, true);
+}
+
+/**
+ * __e1000_write_kmrn_reg - Write kumeran register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ * @locked: semaphore has already been acquired or not
+ *
+ * Acquires semaphore, if necessary. Then write the data to PHY register
+ * at the offset using the kumeran interface. Release any acquired semaphores
+ * before exiting.
+ **/
+static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data,
+ bool locked)
+{
+ u32 kmrnctrlsta;
+
+ DEBUGFUNC("e1000_write_kmrn_reg_generic");
+
+ if (!locked) {
+ s32 ret_val = E1000_SUCCESS;
+
+ if (!hw->phy.ops.acquire)
+ return E1000_SUCCESS;
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+ }
+
+ kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
+ E1000_KMRNCTRLSTA_OFFSET) | data;
+ E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta);
+ E1000_WRITE_FLUSH(hw);
+
+ usec_delay(2);
+
+ if (!locked)
+ hw->phy.ops.release(hw);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_kmrn_reg_generic - Write kumeran register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Acquires semaphore then writes the data to the PHY register at the offset
+ * using the kumeran interface. Release the acquired semaphore before exiting.
+ **/
+s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data)
+{
+ return __e1000_write_kmrn_reg(hw, offset, data, false);
+}
+
+/**
+ * e1000_write_kmrn_reg_locked - Write kumeran register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Write the data to PHY register at the offset using the kumeran interface.
+ * Assumes semaphore already acquired.
+ **/
+s32 e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data)
+{
+ return __e1000_write_kmrn_reg(hw, offset, data, true);
+}
+
+/**
+ * e1000_set_master_slave_mode - Setup PHY for Master/slave mode
+ * @hw: pointer to the HW structure
+ *
+ * Sets up Master/slave mode
+ **/
+static s32 e1000_set_master_slave_mode(struct e1000_hw *hw)
+{
+ s32 ret_val;
+ u16 phy_data;
+
+ /* Resolve Master/Slave mode */
+ ret_val = hw->phy.ops.read_reg(hw, PHY_1000T_CTRL, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ /* load defaults for future use */
+ hw->phy.original_ms_type = (phy_data & CR_1000T_MS_ENABLE) ?
+ ((phy_data & CR_1000T_MS_VALUE) ?
+ e1000_ms_force_master :
+ e1000_ms_force_slave) : e1000_ms_auto;
+
+ switch (hw->phy.ms_type) {
+ case e1000_ms_force_master:
+ phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
+ break;
+ case e1000_ms_force_slave:
+ phy_data |= CR_1000T_MS_ENABLE;
+ phy_data &= ~(CR_1000T_MS_VALUE);
+ break;
+ case e1000_ms_auto:
+ phy_data &= ~CR_1000T_MS_ENABLE;
+ /* fall-through */
+ default:
+ break;
+ }
+
+ return hw->phy.ops.write_reg(hw, PHY_1000T_CTRL, phy_data);
+}
+
+/**
+ * e1000_copper_link_setup_82577 - Setup 82577 PHY for copper link
+ * @hw: pointer to the HW structure
+ *
+ * Sets up Carrier-sense on Transmit and downshift values.
+ **/
+s32 e1000_copper_link_setup_82577(struct e1000_hw *hw)
+{
+ s32 ret_val;
+ u16 phy_data;
+
+ DEBUGFUNC("e1000_copper_link_setup_82577");
+
+ if (hw->phy.reset_disable)
+ return E1000_SUCCESS;
+
+ if (hw->phy.type == e1000_phy_82580) {
+ ret_val = hw->phy.ops.reset(hw);
+ if (ret_val) {
+ DEBUGOUT("Error resetting the PHY.\n");
+ return ret_val;
+ }
+ }
+
+ /* Enable CRS on Tx. This must be set for half-duplex operation. */
+ ret_val = hw->phy.ops.read_reg(hw, I82577_CFG_REG, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ phy_data |= I82577_CFG_ASSERT_CRS_ON_TX;
+
+ /* Enable downshift */
+ phy_data |= I82577_CFG_ENABLE_DOWNSHIFT;
+
+ ret_val = hw->phy.ops.write_reg(hw, I82577_CFG_REG, phy_data);
+ if (ret_val)
+ return ret_val;
+
+ /* Set MDI/MDIX mode */
+ ret_val = hw->phy.ops.read_reg(hw, I82577_PHY_CTRL_2, &phy_data);
+ if (ret_val)
+ return ret_val;
+ phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK;
+ /* Options:
+ * 0 - Auto (default)
+ * 1 - MDI mode
+ * 2 - MDI-X mode
+ */
+ switch (hw->phy.mdix) {
+ case 1:
+ break;
+ case 2:
+ phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX;
+ break;
+ case 0:
+ default:
+ phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX;
+ break;
+ }
+ ret_val = hw->phy.ops.write_reg(hw, I82577_PHY_CTRL_2, phy_data);
+ if (ret_val)
+ return ret_val;
+
+ return e1000_set_master_slave_mode(hw);
+}
+
+/**
+ * e1000_copper_link_setup_m88 - Setup m88 PHY's for copper link
+ * @hw: pointer to the HW structure
+ *
+ * Sets up MDI/MDI-X and polarity for m88 PHY's. If necessary, transmit clock
+ * and downshift values are set also.
+ **/
+s32 e1000_copper_link_setup_m88(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data;
+
+ DEBUGFUNC("e1000_copper_link_setup_m88");
+
+ if (phy->reset_disable)
+ return E1000_SUCCESS;
+
+ /* Enable CRS on Tx. This must be set for half-duplex operation. */
+ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+
+ /* Options:
+ * MDI/MDI-X = 0 (default)
+ * 0 - Auto for all speeds
+ * 1 - MDI mode
+ * 2 - MDI-X mode
+ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+ */
+ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+
+ switch (phy->mdix) {
+ case 1:
+ phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
+ break;
+ case 2:
+ phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
+ break;
+ case 3:
+ phy_data |= M88E1000_PSCR_AUTO_X_1000T;
+ break;
+ case 0:
+ default:
+ phy_data |= M88E1000_PSCR_AUTO_X_MODE;
+ break;
+ }
+
+ /* Options:
+ * disable_polarity_correction = 0 (default)
+ * Automatic Correction for Reversed Cable Polarity
+ * 0 - Disabled
+ * 1 - Enabled
+ */
+ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
+ if (phy->disable_polarity_correction)
+ phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+
+ ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+ if (ret_val)
+ return ret_val;
+
+ if (phy->revision < E1000_REVISION_4) {
+ /* Force TX_CLK in the Extended PHY Specific Control Register
+ * to 25MHz clock.
+ */
+ ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
+ &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ phy_data |= M88E1000_EPSCR_TX_CLK_25;
+
+ if ((phy->revision == E1000_REVISION_2) &&
+ (phy->id == M88E1111_I_PHY_ID)) {
+ /* 82573L PHY - set the downshift counter to 5x. */
+ phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK;
+ phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X;
+ } else {
+ /* Configure Master and Slave downshift values */
+ phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
+ M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
+ phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
+ M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
+ }
+ ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
+ phy_data);
+ if (ret_val)
+ return ret_val;
+ }
+
+ /* Commit the changes. */
+ ret_val = phy->ops.commit(hw);
+ if (ret_val) {
+ DEBUGOUT("Error committing the PHY changes\n");
+ return ret_val;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_copper_link_setup_m88_gen2 - Setup m88 PHY's for copper link
+ * @hw: pointer to the HW structure
+ *
+ * Sets up MDI/MDI-X and polarity for i347-AT4, m88e1322 and m88e1112 PHY's.
+ * Also enables and sets the downshift parameters.
+ **/
+s32 e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data;
+
+ DEBUGFUNC("e1000_copper_link_setup_m88_gen2");
+
+ if (phy->reset_disable)
+ return E1000_SUCCESS;
+
+ /* Enable CRS on Tx. This must be set for half-duplex operation. */
+ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ /* Options:
+ * MDI/MDI-X = 0 (default)
+ * 0 - Auto for all speeds
+ * 1 - MDI mode
+ * 2 - MDI-X mode
+ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+ */
+ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+
+ switch (phy->mdix) {
+ case 1:
+ phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
+ break;
+ case 2:
+ phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
+ break;
+ case 3:
+ /* M88E1112 does not support this mode) */
+ if (phy->id != M88E1112_E_PHY_ID) {
+ phy_data |= M88E1000_PSCR_AUTO_X_1000T;
+ break;
+ }
+ case 0:
+ default:
+ phy_data |= M88E1000_PSCR_AUTO_X_MODE;
+ break;
+ }
+
+ /* Options:
+ * disable_polarity_correction = 0 (default)
+ * Automatic Correction for Reversed Cable Polarity
+ * 0 - Disabled
+ * 1 - Enabled
+ */
+ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
+ if (phy->disable_polarity_correction)
+ phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+
+ /* Enable downshift and setting it to X6 */
+ if (phy->id == M88E1543_E_PHY_ID) {
+ phy_data &= ~I347AT4_PSCR_DOWNSHIFT_ENABLE;
+ ret_val =
+ phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.commit(hw);
+ if (ret_val) {
+ DEBUGOUT("Error committing the PHY changes\n");
+ return ret_val;
+ }
+ }
+
+ phy_data &= ~I347AT4_PSCR_DOWNSHIFT_MASK;
+ phy_data |= I347AT4_PSCR_DOWNSHIFT_6X;
+ phy_data |= I347AT4_PSCR_DOWNSHIFT_ENABLE;
+
+ ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+ if (ret_val)
+ return ret_val;
+
+ /* Commit the changes. */
+ ret_val = phy->ops.commit(hw);
+ if (ret_val) {
+ DEBUGOUT("Error committing the PHY changes\n");
+ return ret_val;
+ }
+
+ ret_val = e1000_set_master_slave_mode(hw);
+ if (ret_val)
+ return ret_val;
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_copper_link_setup_igp - Setup igp PHY's for copper link
+ * @hw: pointer to the HW structure
+ *
+ * Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for
+ * igp PHY's.
+ **/
+s32 e1000_copper_link_setup_igp(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 data;
+
+ DEBUGFUNC("e1000_copper_link_setup_igp");
+
+ if (phy->reset_disable)
+ return E1000_SUCCESS;
+
+ ret_val = hw->phy.ops.reset(hw);
+ if (ret_val) {
+ DEBUGOUT("Error resetting the PHY.\n");
+ return ret_val;
+ }
+
+ /* Wait 100ms for MAC to configure PHY from NVM settings, to avoid
+ * timeout issues when LFS is enabled.
+ */
+ msec_delay(100);
+
+ /* disable lplu d0 during driver init */
+ if (hw->phy.ops.set_d0_lplu_state) {
+ ret_val = hw->phy.ops.set_d0_lplu_state(hw, false);
+ if (ret_val) {
+ DEBUGOUT("Error Disabling LPLU D0\n");
+ return ret_val;
+ }
+ }
+ /* Configure mdi-mdix settings */
+ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data);
+ if (ret_val)
+ return ret_val;
+
+ data &= ~IGP01E1000_PSCR_AUTO_MDIX;
+
+ switch (phy->mdix) {
+ case 1:
+ data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
+ break;
+ case 2:
+ data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
+ break;
+ case 0:
+ default:
+ data |= IGP01E1000_PSCR_AUTO_MDIX;
+ break;
+ }
+ ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, data);
+ if (ret_val)
+ return ret_val;
+
+ /* set auto-master slave resolution settings */
+ if (hw->mac.autoneg) {
+ /* when autonegotiation advertisement is only 1000Mbps then we
+ * should disable SmartSpeed and enable Auto MasterSlave
+ * resolution as hardware default.
+ */
+ if (phy->autoneg_advertised == ADVERTISE_1000_FULL) {
+ /* Disable SmartSpeed */
+ ret_val = phy->ops.read_reg(hw,
+ IGP01E1000_PHY_PORT_CONFIG,
+ &data);
+ if (ret_val)
+ return ret_val;
+
+ data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+ ret_val = phy->ops.write_reg(hw,
+ IGP01E1000_PHY_PORT_CONFIG,
+ data);
+ if (ret_val)
+ return ret_val;
+
+ /* Set auto Master/Slave resolution process */
+ ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data);
+ if (ret_val)
+ return ret_val;
+
+ data &= ~CR_1000T_MS_ENABLE;
+ ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data);
+ if (ret_val)
+ return ret_val;
+ }
+
+ ret_val = e1000_set_master_slave_mode(hw);
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_phy_setup_autoneg - Configure PHY for auto-negotiation
+ * @hw: pointer to the HW structure
+ *
+ * Reads the MII auto-neg advertisement register and/or the 1000T control
+ * register and if the PHY is already setup for auto-negotiation, then
+ * return successful. Otherwise, setup advertisement and flow control to
+ * the appropriate values for the wanted auto-negotiation.
+ **/
+static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 mii_autoneg_adv_reg;
+ u16 mii_1000t_ctrl_reg = 0;
+
+ DEBUGFUNC("e1000_phy_setup_autoneg");
+
+ phy->autoneg_advertised &= phy->autoneg_mask;
+
+ /* Read the MII Auto-Neg Advertisement Register (Address 4). */
+ ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
+ if (ret_val)
+ return ret_val;
+
+ if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
+ /* Read the MII 1000Base-T Control Register (Address 9). */
+ ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL,
+ &mii_1000t_ctrl_reg);
+ if (ret_val)
+ return ret_val;
+ }
+
+ /* Need to parse both autoneg_advertised and fc and set up
+ * the appropriate PHY registers. First we will parse for
+ * autoneg_advertised software override. Since we can advertise
+ * a plethora of combinations, we need to check each bit
+ * individually.
+ */
+
+ /* First we clear all the 10/100 mb speed bits in the Auto-Neg
+ * Advertisement Register (Address 4) and the 1000 mb speed bits in
+ * the 1000Base-T Control Register (Address 9).
+ */
+ mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS |
+ NWAY_AR_100TX_HD_CAPS |
+ NWAY_AR_10T_FD_CAPS |
+ NWAY_AR_10T_HD_CAPS);
+ mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS);
+
+ DEBUGOUT1("autoneg_advertised %x\n", phy->autoneg_advertised);
+
+ /* Do we want to advertise 10 Mb Half Duplex? */
+ if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
+ DEBUGOUT("Advertise 10mb Half duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
+ }
+
+ /* Do we want to advertise 10 Mb Full Duplex? */
+ if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
+ DEBUGOUT("Advertise 10mb Full duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
+ }
+
+ /* Do we want to advertise 100 Mb Half Duplex? */
+ if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
+ DEBUGOUT("Advertise 100mb Half duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
+ }
+
+ /* Do we want to advertise 100 Mb Full Duplex? */
+ if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
+ DEBUGOUT("Advertise 100mb Full duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
+ }
+
+ /* We do not allow the Phy to advertise 1000 Mb Half Duplex */
+ if (phy->autoneg_advertised & ADVERTISE_1000_HALF)
+ DEBUGOUT("Advertise 1000mb Half duplex request denied!\n");
+
+ /* Do we want to advertise 1000 Mb Full Duplex? */
+ if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
+ DEBUGOUT("Advertise 1000mb Full duplex\n");
+ mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
+ }
+
+ /* Check for a software override of the flow control settings, and
+ * setup the PHY advertisement registers accordingly. If
+ * auto-negotiation is enabled, then software will have to set the
+ * "PAUSE" bits to the correct value in the Auto-Negotiation
+ * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-
+ * negotiation.
+ *
+ * The possible values of the "fc" parameter are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause frames
+ * but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames
+ * but we do not support receiving pause frames).
+ * 3: Both Rx and Tx flow control (symmetric) are enabled.
+ * other: No software override. The flow control configuration
+ * in the EEPROM is used.
+ */
+ switch (hw->fc.current_mode) {
+ case e1000_fc_none:
+ /* Flow control (Rx & Tx) is completely disabled by a
+ * software over-ride.
+ */
+ mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+ break;
+ case e1000_fc_rx_pause:
+ /* Rx Flow control is enabled, and Tx Flow control is
+ * disabled, by a software over-ride.
+ *
+ * Since there really isn't a way to advertise that we are
+ * capable of Rx Pause ONLY, we will advertise that we
+ * support both symmetric and asymmetric Rx PAUSE. Later
+ * (in e1000_config_fc_after_link_up) we will disable the
+ * hw's ability to send PAUSE frames.
+ */
+ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+ break;
+ case e1000_fc_tx_pause:
+ /* Tx Flow control is enabled, and Rx Flow control is
+ * disabled, by a software over-ride.
+ */
+ mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
+ mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
+ break;
+ case e1000_fc_full:
+ /* Flow control (both Rx and Tx) is enabled by a software
+ * over-ride.
+ */
+ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+ break;
+ default:
+ DEBUGOUT("Flow control param set incorrectly\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
+ if (ret_val)
+ return ret_val;
+
+ DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+ if (phy->autoneg_mask & ADVERTISE_1000_FULL)
+ ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL,
+ mii_1000t_ctrl_reg);
+
+ return ret_val;
+}
+
+/**
+ * e1000_copper_link_autoneg - Setup/Enable autoneg for copper link
+ * @hw: pointer to the HW structure
+ *
+ * Performs initial bounds checking on autoneg advertisement parameter, then
+ * configure to advertise the full capability. Setup the PHY to autoneg
+ * and restart the negotiation process between the link partner. If
+ * autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
+ **/
+static s32 e1000_copper_link_autoneg(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_ctrl;
+
+ DEBUGFUNC("e1000_copper_link_autoneg");
+
+ /* Perform some bounds checking on the autoneg advertisement
+ * parameter.
+ */
+ phy->autoneg_advertised &= phy->autoneg_mask;
+
+ /* If autoneg_advertised is zero, we assume it was not defaulted
+ * by the calling code so we set to advertise full capability.
+ */
+ if (!phy->autoneg_advertised)
+ phy->autoneg_advertised = phy->autoneg_mask;
+
+ DEBUGOUT("Reconfiguring auto-neg advertisement params\n");
+ ret_val = e1000_phy_setup_autoneg(hw);
+ if (ret_val) {
+ DEBUGOUT("Error Setting up Auto-Negotiation\n");
+ return ret_val;
+ }
+ DEBUGOUT("Restarting Auto-Neg\n");
+
+ /* Restart auto-negotiation by setting the Auto Neg Enable bit and
+ * the Auto Neg Restart bit in the PHY control register.
+ */
+ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
+ if (ret_val)
+ return ret_val;
+
+ phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+ ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
+ if (ret_val)
+ return ret_val;
+
+ /* Does the user want to wait for Auto-Neg to complete here, or
+ * check at a later time (for example, callback routine).
+ */
+ if (phy->autoneg_wait_to_complete) {
+ ret_val = e1000_wait_autoneg(hw);
+ if (ret_val) {
+ DEBUGOUT("Error while waiting for autoneg to complete\n");
+ return ret_val;
+ }
+ }
+
+ hw->mac.get_link_status = true;
+
+ return ret_val;
+}
+
+/**
+ * e1000_setup_copper_link_generic - Configure copper link settings
+ * @hw: pointer to the HW structure
+ *
+ * Calls the appropriate function to configure the link for auto-neg or forced
+ * speed and duplex. Then we check for link, once link is established calls
+ * to configure collision distance and flow control are called. If link is
+ * not established, we return -E1000_ERR_PHY (-2).
+ **/
+s32 e1000_setup_copper_link_generic(struct e1000_hw *hw)
+{
+ s32 ret_val;
+ bool link;
+
+ DEBUGFUNC("e1000_setup_copper_link_generic");
+
+ if (hw->mac.autoneg) {
+ /* Setup autoneg and flow control advertisement and perform
+ * autonegotiation.
+ */
+ ret_val = e1000_copper_link_autoneg(hw);
+ if (ret_val)
+ return ret_val;
+ } else {
+ /* PHY will be set to 10H, 10F, 100H or 100F
+ * depending on user settings.
+ */
+ DEBUGOUT("Forcing Speed and Duplex\n");
+ ret_val = hw->phy.ops.force_speed_duplex(hw);
+ if (ret_val) {
+ DEBUGOUT("Error Forcing Speed and Duplex\n");
+ return ret_val;
+ }
+ }
+
+ /* Check link status. Wait up to 100 microseconds for link to become
+ * valid.
+ */
+ ret_val = e1000_phy_has_link_generic(hw, COPPER_LINK_UP_LIMIT, 10,
+ &link);
+ if (ret_val)
+ return ret_val;
+
+ if (link) {
+ DEBUGOUT("Valid link established!!!\n");
+ hw->mac.ops.config_collision_dist(hw);
+ ret_val = e1000_config_fc_after_link_up_generic(hw);
+ } else {
+ DEBUGOUT("Unable to establish link!!!\n");
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY
+ * @hw: pointer to the HW structure
+ *
+ * Calls the PHY setup function to force speed and duplex. Clears the
+ * auto-crossover to force MDI manually. Waits for link and returns
+ * successful if link up is successful, else -E1000_ERR_PHY (-2).
+ **/
+s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data;
+ bool link;
+
+ DEBUGFUNC("e1000_phy_force_speed_duplex_igp");
+
+ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ e1000_phy_force_speed_duplex_setup(hw, &phy_data);
+
+ ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
+ if (ret_val)
+ return ret_val;
+
+ /* Clear Auto-Crossover to force MDI manually. IGP requires MDI
+ * forced whenever speed and duplex are forced.
+ */
+ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
+ phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
+
+ ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
+ if (ret_val)
+ return ret_val;
+
+ DEBUGOUT1("IGP PSCR: %X\n", phy_data);
+
+ usec_delay(1);
+
+ if (phy->autoneg_wait_to_complete) {
+ DEBUGOUT("Waiting for forced speed/duplex link on IGP phy.\n");
+
+ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+ 100000, &link);
+ if (ret_val)
+ return ret_val;
+
+ if (!link)
+ DEBUGOUT("Link taking longer than expected.\n");
+
+ /* Try once more */
+ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+ 100000, &link);
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY
+ * @hw: pointer to the HW structure
+ *
+ * Calls the PHY setup function to force speed and duplex. Clears the
+ * auto-crossover to force MDI manually. Resets the PHY to commit the
+ * changes. If time expires while waiting for link up, we reset the DSP.
+ * After reset, TX_CLK and CRS on Tx must be set. Return successful upon
+ * successful completion, else return corresponding error code.
+ **/
+s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data;
+ bool link;
+
+ DEBUGFUNC("e1000_phy_force_speed_duplex_m88");
+
+ /* I210 and I211 devices support Auto-Crossover in forced operation. */
+ if (phy->type != e1000_phy_i210) {
+ /* Clear Auto-Crossover to force MDI manually. M88E1000
+ * requires MDI forced whenever speed and duplex are forced.
+ */
+ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL,
+ &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+ ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL,
+ phy_data);
+ if (ret_val)
+ return ret_val;
+ }
+
+ DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data);
+
+ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ e1000_phy_force_speed_duplex_setup(hw, &phy_data);
+
+ ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
+ if (ret_val)
+ return ret_val;
+
+ /* Reset the phy to commit changes. */
+ ret_val = hw->phy.ops.commit(hw);
+ if (ret_val)
+ return ret_val;
+
+ if (phy->autoneg_wait_to_complete) {
+ DEBUGOUT("Waiting for forced speed/duplex link on M88 phy.\n");
+
+ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+ 100000, &link);
+ if (ret_val)
+ return ret_val;
+
+ if (!link) {
+ bool reset_dsp = true;
+
+ switch (hw->phy.id) {
+ case I347AT4_E_PHY_ID:
+ case M88E1340M_E_PHY_ID:
+ case M88E1112_E_PHY_ID:
+ case M88E1543_E_PHY_ID:
+ case I210_I_PHY_ID:
+ reset_dsp = false;
+ break;
+ default:
+ if (hw->phy.type != e1000_phy_m88)
+ reset_dsp = false;
+ break;
+ }
+
+ if (!reset_dsp) {
+ DEBUGOUT("Link taking longer than expected.\n");
+ } else {
+ /* We didn't get link.
+ * Reset the DSP and cross our fingers.
+ */
+ ret_val = phy->ops.write_reg(hw,
+ M88E1000_PHY_PAGE_SELECT,
+ 0x001d);
+ if (ret_val)
+ return ret_val;
+ ret_val = e1000_phy_reset_dsp_generic(hw);
+ if (ret_val)
+ return ret_val;
+ }
+ }
+
+ /* Try once more */
+ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+ 100000, &link);
+ if (ret_val)
+ return ret_val;
+ }
+
+ if (hw->phy.type != e1000_phy_m88)
+ return E1000_SUCCESS;
+
+ if (hw->phy.id == I347AT4_E_PHY_ID ||
+ hw->phy.id == M88E1340M_E_PHY_ID ||
+ hw->phy.id == M88E1112_E_PHY_ID)
+ return E1000_SUCCESS;
+ if (hw->phy.id == I210_I_PHY_ID)
+ return E1000_SUCCESS;
+ if ((hw->phy.id == M88E1543_E_PHY_ID))
+ return E1000_SUCCESS;
+ ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ /* Resetting the phy means we need to re-force TX_CLK in the
+ * Extended PHY Specific Control Register to 25MHz clock from
+ * the reset value of 2.5MHz.
+ */
+ phy_data |= M88E1000_EPSCR_TX_CLK_25;
+ ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
+ if (ret_val)
+ return ret_val;
+
+ /* In addition, we must re-enable CRS on Tx for both half and full
+ * duplex.
+ */
+ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+ ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+
+ return ret_val;
+}
+
+/**
+ * e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex
+ * @hw: pointer to the HW structure
+ *
+ * Forces the speed and duplex settings of the PHY.
+ * This is a function pointer entry point only called by
+ * PHY setup routines.
+ **/
+s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 data;
+ bool link;
+
+ DEBUGFUNC("e1000_phy_force_speed_duplex_ife");
+
+ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &data);
+ if (ret_val)
+ return ret_val;
+
+ e1000_phy_force_speed_duplex_setup(hw, &data);
+
+ ret_val = phy->ops.write_reg(hw, PHY_CONTROL, data);
+ if (ret_val)
+ return ret_val;
+
+ /* Disable MDI-X support for 10/100 */
+ ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data);
+ if (ret_val)
+ return ret_val;
+
+ data &= ~IFE_PMC_AUTO_MDIX;
+ data &= ~IFE_PMC_FORCE_MDIX;
+
+ ret_val = phy->ops.write_reg(hw, IFE_PHY_MDIX_CONTROL, data);
+ if (ret_val)
+ return ret_val;
+
+ DEBUGOUT1("IFE PMC: %X\n", data);
+
+ usec_delay(1);
+
+ if (phy->autoneg_wait_to_complete) {
+ DEBUGOUT("Waiting for forced speed/duplex link on IFE phy.\n");
+
+ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+ 100000, &link);
+ if (ret_val)
+ return ret_val;
+
+ if (!link)
+ DEBUGOUT("Link taking longer than expected.\n");
+
+ /* Try once more */
+ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+ 100000, &link);
+ if (ret_val)
+ return ret_val;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex
+ * @hw: pointer to the HW structure
+ * @phy_ctrl: pointer to current value of PHY_CONTROL
+ *
+ * Forces speed and duplex on the PHY by doing the following: disable flow
+ * control, force speed/duplex on the MAC, disable auto speed detection,
+ * disable auto-negotiation, configure duplex, configure speed, configure
+ * the collision distance, write configuration to CTRL register. The
+ * caller must write to the PHY_CONTROL register for these settings to
+ * take affect.
+ **/
+void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl)
+{
+ struct e1000_mac_info *mac = &hw->mac;
+ u32 ctrl;
+
+ DEBUGFUNC("e1000_phy_force_speed_duplex_setup");
+
+ /* Turn off flow control when forcing speed/duplex */
+ hw->fc.current_mode = e1000_fc_none;
+
+ /* Force speed/duplex on the mac */
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+ ctrl &= ~E1000_CTRL_SPD_SEL;
+
+ /* Disable Auto Speed Detection */
+ ctrl &= ~E1000_CTRL_ASDE;
+
+ /* Disable autoneg on the phy */
+ *phy_ctrl &= ~MII_CR_AUTO_NEG_EN;
+
+ /* Forcing Full or Half Duplex? */
+ if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) {
+ ctrl &= ~E1000_CTRL_FD;
+ *phy_ctrl &= ~MII_CR_FULL_DUPLEX;
+ DEBUGOUT("Half Duplex\n");
+ } else {
+ ctrl |= E1000_CTRL_FD;
+ *phy_ctrl |= MII_CR_FULL_DUPLEX;
+ DEBUGOUT("Full Duplex\n");
+ }
+
+ /* Forcing 10mb or 100mb? */
+ if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) {
+ ctrl |= E1000_CTRL_SPD_100;
+ *phy_ctrl |= MII_CR_SPEED_100;
+ *phy_ctrl &= ~MII_CR_SPEED_1000;
+ DEBUGOUT("Forcing 100mb\n");
+ } else {
+ ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
+ *phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100);
+ DEBUGOUT("Forcing 10mb\n");
+ }
+
+ hw->mac.ops.config_collision_dist(hw);
+
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+}
+
+/**
+ * e1000_set_d3_lplu_state_generic - Sets low power link up state for D3
+ * @hw: pointer to the HW structure
+ * @active: boolean used to enable/disable lplu
+ *
+ * Success returns 0, Failure returns 1
+ *
+ * The low power link up (lplu) state is set to the power management level D3
+ * and SmartSpeed is disabled when active is true, else clear lplu for D3
+ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU
+ * is used during Dx states where the power conservation is most important.
+ * During driver activity, SmartSpeed should be enabled so performance is
+ * maintained.
+ **/
+s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 data;
+
+ DEBUGFUNC("e1000_set_d3_lplu_state_generic");
+
+ if (!hw->phy.ops.read_reg)
+ return E1000_SUCCESS;
+
+ ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
+ if (ret_val)
+ return ret_val;
+
+ if (!active) {
+ data &= ~IGP02E1000_PM_D3_LPLU;
+ ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+ data);
+ if (ret_val)
+ return ret_val;
+ /* LPLU and SmartSpeed are mutually exclusive. LPLU is used
+ * during Dx states where the power conservation is most
+ * important. During driver activity we should enable
+ * SmartSpeed, so performance is maintained.
+ */
+ if (phy->smart_speed == e1000_smart_speed_on) {
+ ret_val = phy->ops.read_reg(hw,
+ IGP01E1000_PHY_PORT_CONFIG,
+ &data);
+ if (ret_val)
+ return ret_val;
+
+ data |= IGP01E1000_PSCFR_SMART_SPEED;
+ ret_val = phy->ops.write_reg(hw,
+ IGP01E1000_PHY_PORT_CONFIG,
+ data);
+ if (ret_val)
+ return ret_val;
+ } else if (phy->smart_speed == e1000_smart_speed_off) {
+ ret_val = phy->ops.read_reg(hw,
+ IGP01E1000_PHY_PORT_CONFIG,
+ &data);
+ if (ret_val)
+ return ret_val;
+
+ data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+ ret_val = phy->ops.write_reg(hw,
+ IGP01E1000_PHY_PORT_CONFIG,
+ data);
+ if (ret_val)
+ return ret_val;
+ }
+ } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
+ (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
+ (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
+ data |= IGP02E1000_PM_D3_LPLU;
+ ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+ data);
+ if (ret_val)
+ return ret_val;
+
+ /* When LPLU is enabled, we should disable SmartSpeed */
+ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+ &data);
+ if (ret_val)
+ return ret_val;
+
+ data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+ ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+ data);
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_downshift_generic - Checks whether a downshift in speed occurred
+ * @hw: pointer to the HW structure
+ *
+ * Success returns 0, Failure returns 1
+ *
+ * A downshift is detected by querying the PHY link health.
+ **/
+s32 e1000_check_downshift_generic(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data, offset, mask;
+
+ DEBUGFUNC("e1000_check_downshift_generic");
+
+ switch (phy->type) {
+ case e1000_phy_i210:
+ case e1000_phy_m88:
+ case e1000_phy_gg82563:
+ offset = M88E1000_PHY_SPEC_STATUS;
+ mask = M88E1000_PSSR_DOWNSHIFT;
+ break;
+ case e1000_phy_igp_2:
+ case e1000_phy_igp_3:
+ offset = IGP01E1000_PHY_LINK_HEALTH;
+ mask = IGP01E1000_PLHR_SS_DOWNGRADE;
+ break;
+ default:
+ /* speed downshift not supported */
+ phy->speed_downgraded = false;
+ return E1000_SUCCESS;
+ }
+
+ ret_val = phy->ops.read_reg(hw, offset, &phy_data);
+
+ if (!ret_val)
+ phy->speed_downgraded = !!(phy_data & mask);
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_polarity_m88 - Checks the polarity.
+ * @hw: pointer to the HW structure
+ *
+ * Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ * Polarity is determined based on the PHY specific status register.
+ **/
+s32 e1000_check_polarity_m88(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 data;
+
+ DEBUGFUNC("e1000_check_polarity_m88");
+
+ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &data);
+
+ if (!ret_val)
+ phy->cable_polarity = ((data & M88E1000_PSSR_REV_POLARITY)
+ ? e1000_rev_polarity_reversed
+ : e1000_rev_polarity_normal);
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_polarity_igp - Checks the polarity.
+ * @hw: pointer to the HW structure
+ *
+ * Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ * Polarity is determined based on the PHY port status register, and the
+ * current speed (since there is no polarity at 100Mbps).
+ **/
+s32 e1000_check_polarity_igp(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 data, offset, mask;
+
+ DEBUGFUNC("e1000_check_polarity_igp");
+
+ /* Polarity is determined based on the speed of
+ * our connection.
+ */
+ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
+ if (ret_val)
+ return ret_val;
+
+ if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
+ IGP01E1000_PSSR_SPEED_1000MBPS) {
+ offset = IGP01E1000_PHY_PCS_INIT_REG;
+ mask = IGP01E1000_PHY_POLARITY_MASK;
+ } else {
+ /* This really only applies to 10Mbps since
+ * there is no polarity for 100Mbps (always 0).
+ */
+ offset = IGP01E1000_PHY_PORT_STATUS;
+ mask = IGP01E1000_PSSR_POLARITY_REVERSED;
+ }
+
+ ret_val = phy->ops.read_reg(hw, offset, &data);
+
+ if (!ret_val)
+ phy->cable_polarity = ((data & mask)
+ ? e1000_rev_polarity_reversed
+ : e1000_rev_polarity_normal);
+
+ return ret_val;
+}
+
+/**
+ * e1000_check_polarity_ife - Check cable polarity for IFE PHY
+ * @hw: pointer to the HW structure
+ *
+ * Polarity is determined on the polarity reversal feature being enabled.
+ **/
+s32 e1000_check_polarity_ife(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data, offset, mask;
+
+ DEBUGFUNC("e1000_check_polarity_ife");
+
+ /* Polarity is determined based on the reversal feature being enabled.
+ */
+ if (phy->polarity_correction) {
+ offset = IFE_PHY_EXTENDED_STATUS_CONTROL;
+ mask = IFE_PESC_POLARITY_REVERSED;
+ } else {
+ offset = IFE_PHY_SPECIAL_CONTROL;
+ mask = IFE_PSC_FORCE_POLARITY;
+ }
+
+ ret_val = phy->ops.read_reg(hw, offset, &phy_data);
+
+ if (!ret_val)
+ phy->cable_polarity = ((phy_data & mask)
+ ? e1000_rev_polarity_reversed
+ : e1000_rev_polarity_normal);
+
+ return ret_val;
+}
+
+/**
+ * e1000_wait_autoneg - Wait for auto-neg completion
+ * @hw: pointer to the HW structure
+ *
+ * Waits for auto-negotiation to complete or for the auto-negotiation time
+ * limit to expire, which ever happens first.
+ **/
+static s32 e1000_wait_autoneg(struct e1000_hw *hw)
+{
+ s32 ret_val = E1000_SUCCESS;
+ u16 i, phy_status;
+
+ DEBUGFUNC("e1000_wait_autoneg");
+
+ if (!hw->phy.ops.read_reg)
+ return E1000_SUCCESS;
+
+ /* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
+ for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+ if (ret_val)
+ break;
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+ if (ret_val)
+ break;
+ if (phy_status & MII_SR_AUTONEG_COMPLETE)
+ break;
+ msec_delay(100);
+ }
+
+ /* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
+ * has completed.
+ */
+ return ret_val;
+}
+
+/**
+ * e1000_phy_has_link_generic - Polls PHY for link
+ * @hw: pointer to the HW structure
+ * @iterations: number of times to poll for link
+ * @usec_interval: delay between polling attempts
+ * @success: pointer to whether polling was successful or not
+ *
+ * Polls the PHY status register for link, 'iterations' number of times.
+ **/
+s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
+ u32 usec_interval, bool *success)
+{
+ s32 ret_val = E1000_SUCCESS;
+ u16 i, phy_status;
+
+ DEBUGFUNC("e1000_phy_has_link_generic");
+
+ if (!hw->phy.ops.read_reg)
+ return E1000_SUCCESS;
+
+ for (i = 0; i < iterations; i++) {
+ /* Some PHYs require the PHY_STATUS register to be read
+ * twice due to the link bit being sticky. No harm doing
+ * it across the board.
+ */
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+ if (ret_val)
+ /* If the first read fails, another entity may have
+ * ownership of the resources, wait and try again to
+ * see if they have relinquished the resources yet.
+ */
+ usec_delay(usec_interval);
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+ if (ret_val)
+ break;
+ if (phy_status & MII_SR_LINK_STATUS)
+ break;
+ if (usec_interval >= 1000)
+ msec_delay_irq(usec_interval/1000);
+ else
+ usec_delay(usec_interval);
+ }
+
+ *success = (i < iterations);
+
+ return ret_val;
+}
+
+/**
+ * e1000_get_cable_length_m88 - Determine cable length for m88 PHY
+ * @hw: pointer to the HW structure
+ *
+ * Reads the PHY specific status register to retrieve the cable length
+ * information. The cable length is determined by averaging the minimum and
+ * maximum values to get the "average" cable length. The m88 PHY has four
+ * possible cable length values, which are:
+ * Register Value Cable Length
+ * 0 < 50 meters
+ * 1 50 - 80 meters
+ * 2 80 - 110 meters
+ * 3 110 - 140 meters
+ * 4 > 140 meters
+ **/
+s32 e1000_get_cable_length_m88(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data, index;
+
+ DEBUGFUNC("e1000_get_cable_length_m88");
+
+ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+ M88E1000_PSSR_CABLE_LENGTH_SHIFT);
+
+ if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1)
+ return -E1000_ERR_PHY;
+
+ phy->min_cable_length = e1000_m88_cable_length_table[index];
+ phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
+
+ phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
+
+ return E1000_SUCCESS;
+}
+
+s32 e1000_get_cable_length_m88_gen2(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data, phy_data2, is_cm;
+ u16 index, default_page;
+
+ DEBUGFUNC("e1000_get_cable_length_m88_gen2");
+
+ switch (hw->phy.id) {
+ case I210_I_PHY_ID:
+ /* Get cable length from PHY Cable Diagnostics Control Reg */
+ ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) +
+ (I347AT4_PCDL + phy->addr),
+ &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ /* Check if the unit of cable length is meters or cm */
+ ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) +
+ I347AT4_PCDC, &phy_data2);
+ if (ret_val)
+ return ret_val;
+
+ is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
+
+ /* Populate the phy structure with cable length in meters */
+ phy->min_cable_length = phy_data / (is_cm ? 100 : 1);
+ phy->max_cable_length = phy_data / (is_cm ? 100 : 1);
+ phy->cable_length = phy_data / (is_cm ? 100 : 1);
+ break;
+ case M88E1543_E_PHY_ID:
+ case M88E1340M_E_PHY_ID:
+ case I347AT4_E_PHY_ID:
+ /* Remember the original page select and set it to 7 */
+ ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
+ &default_page);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07);
+ if (ret_val)
+ return ret_val;
+
+ /* Get cable length from PHY Cable Diagnostics Control Reg */
+ ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr),
+ &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ /* Check if the unit of cable length is meters or cm */
+ ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2);
+ if (ret_val)
+ return ret_val;
+
+ is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
+
+ /* Populate the phy structure with cable length in meters */
+ phy->min_cable_length = phy_data / (is_cm ? 100 : 1);
+ phy->max_cable_length = phy_data / (is_cm ? 100 : 1);
+ phy->cable_length = phy_data / (is_cm ? 100 : 1);
+
+ /* Reset the page select to its original value */
+ ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT,
+ default_page);
+ if (ret_val)
+ return ret_val;
+ break;
+
+ case M88E1112_E_PHY_ID:
+ /* Remember the original page select and set it to 5 */
+ ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
+ &default_page);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x05);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.read_reg(hw, M88E1112_VCT_DSP_DISTANCE,
+ &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+ M88E1000_PSSR_CABLE_LENGTH_SHIFT;
+
+ if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1)
+ return -E1000_ERR_PHY;
+
+ phy->min_cable_length = e1000_m88_cable_length_table[index];
+ phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
+
+ phy->cable_length = (phy->min_cable_length +
+ phy->max_cable_length) / 2;
+
+ /* Reset the page select to its original value */
+ ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT,
+ default_page);
+ if (ret_val)
+ return ret_val;
+
+ break;
+ default:
+ return -E1000_ERR_PHY;
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_get_cable_length_igp_2 - Determine cable length for igp2 PHY
+ * @hw: pointer to the HW structure
+ *
+ * The automatic gain control (agc) normalizes the amplitude of the
+ * received signal, adjusting for the attenuation produced by the
+ * cable. By reading the AGC registers, which represent the
+ * combination of coarse and fine gain value, the value can be put
+ * into a lookup table to obtain the approximate cable length
+ * for each channel.
+ **/
+s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data, i, agc_value = 0;
+ u16 cur_agc_index, max_agc_index = 0;
+ u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1;
+ static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = {
+ IGP02E1000_PHY_AGC_A,
+ IGP02E1000_PHY_AGC_B,
+ IGP02E1000_PHY_AGC_C,
+ IGP02E1000_PHY_AGC_D
+ };
+
+ DEBUGFUNC("e1000_get_cable_length_igp_2");
+
+ /* Read the AGC registers for all channels */
+ for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) {
+ ret_val = phy->ops.read_reg(hw, agc_reg_array[i], &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ /* Getting bits 15:9, which represent the combination of
+ * coarse and fine gain values. The result is a number
+ * that can be put into the lookup table to obtain the
+ * approximate cable length.
+ */
+ cur_agc_index = ((phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) &
+ IGP02E1000_AGC_LENGTH_MASK);
+
+ /* Array index bound check. */
+ if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) ||
+ (cur_agc_index == 0))
+ return -E1000_ERR_PHY;
+
+ /* Remove min & max AGC values from calculation. */
+ if (e1000_igp_2_cable_length_table[min_agc_index] >
+ e1000_igp_2_cable_length_table[cur_agc_index])
+ min_agc_index = cur_agc_index;
+ if (e1000_igp_2_cable_length_table[max_agc_index] <
+ e1000_igp_2_cable_length_table[cur_agc_index])
+ max_agc_index = cur_agc_index;
+
+ agc_value += e1000_igp_2_cable_length_table[cur_agc_index];
+ }
+
+ agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] +
+ e1000_igp_2_cable_length_table[max_agc_index]);
+ agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2);
+
+ /* Calculate cable length with the error range of +/- 10 meters. */
+ phy->min_cable_length = (((agc_value - IGP02E1000_AGC_RANGE) > 0) ?
+ (agc_value - IGP02E1000_AGC_RANGE) : 0);
+ phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE;
+
+ phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_get_phy_info_m88 - Retrieve PHY information
+ * @hw: pointer to the HW structure
+ *
+ * Valid for only copper links. Read the PHY status register (sticky read)
+ * to verify that link is up. Read the PHY special control register to
+ * determine the polarity and 10base-T extended distance. Read the PHY
+ * special status register to determine MDI/MDIx and current speed. If
+ * speed is 1000, then determine cable length, local and remote receiver.
+ **/
+s32 e1000_get_phy_info_m88(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data;
+ bool link;
+
+ DEBUGFUNC("e1000_get_phy_info_m88");
+
+ if (phy->media_type != e1000_media_type_copper) {
+ DEBUGOUT("Phy info is only valid for copper media\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
+ if (ret_val)
+ return ret_val;
+
+ if (!link) {
+ DEBUGOUT("Phy info is only valid if link is up\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ phy->polarity_correction = !!(phy_data &
+ M88E1000_PSCR_POLARITY_REVERSAL);
+
+ ret_val = e1000_check_polarity_m88(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ phy->is_mdix = !!(phy_data & M88E1000_PSSR_MDIX);
+
+ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
+ ret_val = hw->phy.ops.get_cable_length(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS)
+ ? e1000_1000t_rx_status_ok
+ : e1000_1000t_rx_status_not_ok;
+
+ phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS)
+ ? e1000_1000t_rx_status_ok
+ : e1000_1000t_rx_status_not_ok;
+ } else {
+ /* Set values to "undefined" */
+ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+ phy->local_rx = e1000_1000t_rx_status_undefined;
+ phy->remote_rx = e1000_1000t_rx_status_undefined;
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_get_phy_info_igp - Retrieve igp PHY information
+ * @hw: pointer to the HW structure
+ *
+ * Read PHY status to determine if link is up. If link is up, then
+ * set/determine 10base-T extended distance and polarity correction. Read
+ * PHY port status to determine MDI/MDIx and speed. Based on the speed,
+ * determine on the cable length, local and remote receiver.
+ **/
+s32 e1000_get_phy_info_igp(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 data;
+ bool link;
+
+ DEBUGFUNC("e1000_get_phy_info_igp");
+
+ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
+ if (ret_val)
+ return ret_val;
+
+ if (!link) {
+ DEBUGOUT("Phy info is only valid if link is up\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ phy->polarity_correction = true;
+
+ ret_val = e1000_check_polarity_igp(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
+ if (ret_val)
+ return ret_val;
+
+ phy->is_mdix = !!(data & IGP01E1000_PSSR_MDIX);
+
+ if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
+ IGP01E1000_PSSR_SPEED_1000MBPS) {
+ ret_val = phy->ops.get_cable_length(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data);
+ if (ret_val)
+ return ret_val;
+
+ phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
+ ? e1000_1000t_rx_status_ok
+ : e1000_1000t_rx_status_not_ok;
+
+ phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
+ ? e1000_1000t_rx_status_ok
+ : e1000_1000t_rx_status_not_ok;
+ } else {
+ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+ phy->local_rx = e1000_1000t_rx_status_undefined;
+ phy->remote_rx = e1000_1000t_rx_status_undefined;
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_get_phy_info_ife - Retrieves various IFE PHY states
+ * @hw: pointer to the HW structure
+ *
+ * Populates "phy" structure with various feature states.
+ **/
+s32 e1000_get_phy_info_ife(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 data;
+ bool link;
+
+ DEBUGFUNC("e1000_get_phy_info_ife");
+
+ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
+ if (ret_val)
+ return ret_val;
+
+ if (!link) {
+ DEBUGOUT("Phy info is only valid if link is up\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ ret_val = phy->ops.read_reg(hw, IFE_PHY_SPECIAL_CONTROL, &data);
+ if (ret_val)
+ return ret_val;
+ phy->polarity_correction = !(data & IFE_PSC_AUTO_POLARITY_DISABLE);
+
+ if (phy->polarity_correction) {
+ ret_val = e1000_check_polarity_ife(hw);
+ if (ret_val)
+ return ret_val;
+ } else {
+ /* Polarity is forced */
+ phy->cable_polarity = ((data & IFE_PSC_FORCE_POLARITY)
+ ? e1000_rev_polarity_reversed
+ : e1000_rev_polarity_normal);
+ }
+
+ ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data);
+ if (ret_val)
+ return ret_val;
+
+ phy->is_mdix = !!(data & IFE_PMC_MDIX_STATUS);
+
+ /* The following parameters are undefined for 10/100 operation. */
+ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+ phy->local_rx = e1000_1000t_rx_status_undefined;
+ phy->remote_rx = e1000_1000t_rx_status_undefined;
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_sw_reset_generic - PHY software reset
+ * @hw: pointer to the HW structure
+ *
+ * Does a software reset of the PHY by reading the PHY control register and
+ * setting/write the control register reset bit to the PHY.
+ **/
+s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw)
+{
+ s32 ret_val;
+ u16 phy_ctrl;
+
+ DEBUGFUNC("e1000_phy_sw_reset_generic");
+
+ if (!hw->phy.ops.read_reg)
+ return E1000_SUCCESS;
+
+ ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
+ if (ret_val)
+ return ret_val;
+
+ phy_ctrl |= MII_CR_RESET;
+ ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
+ if (ret_val)
+ return ret_val;
+
+ usec_delay(1);
+
+ return ret_val;
+}
+
+/**
+ * e1000_phy_hw_reset_generic - PHY hardware reset
+ * @hw: pointer to the HW structure
+ *
+ * Verify the reset block is not blocking us from resetting. Acquire
+ * semaphore (if necessary) and read/set/write the device control reset
+ * bit in the PHY. Wait the appropriate delay time for the device to
+ * reset and release the semaphore (if necessary).
+ **/
+s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u32 ctrl;
+
+ DEBUGFUNC("e1000_phy_hw_reset_generic");
+
+ if (phy->ops.check_reset_block) {
+ ret_val = phy->ops.check_reset_block(hw);
+ if (ret_val)
+ return E1000_SUCCESS;
+ }
+
+ ret_val = phy->ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PHY_RST);
+ E1000_WRITE_FLUSH(hw);
+
+ usec_delay(phy->reset_delay_us);
+
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+ E1000_WRITE_FLUSH(hw);
+
+ usec_delay(150);
+
+ phy->ops.release(hw);
+
+ return phy->ops.get_cfg_done(hw);
+}
+
+/**
+ * e1000_get_cfg_done_generic - Generic configuration done
+ * @hw: pointer to the HW structure
+ *
+ * Generic function to wait 10 milli-seconds for configuration to complete
+ * and return success.
+ **/
+s32 e1000_get_cfg_done_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+ DEBUGFUNC("e1000_get_cfg_done_generic");
+
+ msec_delay_irq(10);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_init_script_igp3 - Inits the IGP3 PHY
+ * @hw: pointer to the HW structure
+ *
+ * Initializes a Intel Gigabit PHY3 when an EEPROM is not present.
+ **/
+s32 e1000_phy_init_script_igp3(struct e1000_hw *hw)
+{
+ DEBUGOUT("Running IGP 3 PHY init script\n");
+
+ /* PHY init IGP 3 */
+ /* Enable rise/fall, 10-mode work in class-A */
+ hw->phy.ops.write_reg(hw, 0x2F5B, 0x9018);
+ /* Remove all caps from Replica path filter */
+ hw->phy.ops.write_reg(hw, 0x2F52, 0x0000);
+ /* Bias trimming for ADC, AFE and Driver (Default) */
+ hw->phy.ops.write_reg(hw, 0x2FB1, 0x8B24);
+ /* Increase Hybrid poly bias */
+ hw->phy.ops.write_reg(hw, 0x2FB2, 0xF8F0);
+ /* Add 4% to Tx amplitude in Gig mode */
+ hw->phy.ops.write_reg(hw, 0x2010, 0x10B0);
+ /* Disable trimming (TTT) */
+ hw->phy.ops.write_reg(hw, 0x2011, 0x0000);
+ /* Poly DC correction to 94.6% + 2% for all channels */
+ hw->phy.ops.write_reg(hw, 0x20DD, 0x249A);
+ /* ABS DC correction to 95.9% */
+ hw->phy.ops.write_reg(hw, 0x20DE, 0x00D3);
+ /* BG temp curve trim */
+ hw->phy.ops.write_reg(hw, 0x28B4, 0x04CE);
+ /* Increasing ADC OPAMP stage 1 currents to max */
+ hw->phy.ops.write_reg(hw, 0x2F70, 0x29E4);
+ /* Force 1000 ( required for enabling PHY regs configuration) */
+ hw->phy.ops.write_reg(hw, 0x0000, 0x0140);
+ /* Set upd_freq to 6 */
+ hw->phy.ops.write_reg(hw, 0x1F30, 0x1606);
+ /* Disable NPDFE */
+ hw->phy.ops.write_reg(hw, 0x1F31, 0xB814);
+ /* Disable adaptive fixed FFE (Default) */
+ hw->phy.ops.write_reg(hw, 0x1F35, 0x002A);
+ /* Enable FFE hysteresis */
+ hw->phy.ops.write_reg(hw, 0x1F3E, 0x0067);
+ /* Fixed FFE for short cable lengths */
+ hw->phy.ops.write_reg(hw, 0x1F54, 0x0065);
+ /* Fixed FFE for medium cable lengths */
+ hw->phy.ops.write_reg(hw, 0x1F55, 0x002A);
+ /* Fixed FFE for long cable lengths */
+ hw->phy.ops.write_reg(hw, 0x1F56, 0x002A);
+ /* Enable Adaptive Clip Threshold */
+ hw->phy.ops.write_reg(hw, 0x1F72, 0x3FB0);
+ /* AHT reset limit to 1 */
+ hw->phy.ops.write_reg(hw, 0x1F76, 0xC0FF);
+ /* Set AHT master delay to 127 msec */
+ hw->phy.ops.write_reg(hw, 0x1F77, 0x1DEC);
+ /* Set scan bits for AHT */
+ hw->phy.ops.write_reg(hw, 0x1F78, 0xF9EF);
+ /* Set AHT Preset bits */
+ hw->phy.ops.write_reg(hw, 0x1F79, 0x0210);
+ /* Change integ_factor of channel A to 3 */
+ hw->phy.ops.write_reg(hw, 0x1895, 0x0003);
+ /* Change prop_factor of channels BCD to 8 */
+ hw->phy.ops.write_reg(hw, 0x1796, 0x0008);
+ /* Change cg_icount + enable integbp for channels BCD */
+ hw->phy.ops.write_reg(hw, 0x1798, 0xD008);
+ /* Change cg_icount + enable integbp + change prop_factor_master
+ * to 8 for channel A
+ */
+ hw->phy.ops.write_reg(hw, 0x1898, 0xD918);
+ /* Disable AHT in Slave mode on channel A */
+ hw->phy.ops.write_reg(hw, 0x187A, 0x0800);
+ /* Enable LPLU and disable AN to 1000 in non-D0a states,
+ * Enable SPD+B2B
+ */
+ hw->phy.ops.write_reg(hw, 0x0019, 0x008D);
+ /* Enable restart AN on an1000_dis change */
+ hw->phy.ops.write_reg(hw, 0x001B, 0x2080);
+ /* Enable wh_fifo read clock in 10/100 modes */
+ hw->phy.ops.write_reg(hw, 0x0014, 0x0045);
+ /* Restart AN, Speed selection is 1000 */
+ hw->phy.ops.write_reg(hw, 0x0000, 0x1340);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_get_phy_type_from_id - Get PHY type from id
+ * @phy_id: phy_id read from the phy
+ *
+ * Returns the phy type from the id.
+ **/
+enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id)
+{
+ enum e1000_phy_type phy_type = e1000_phy_unknown;
+
+ switch (phy_id) {
+ case M88E1000_I_PHY_ID:
+ case M88E1000_E_PHY_ID:
+ case M88E1111_I_PHY_ID:
+ case M88E1011_I_PHY_ID:
+ case M88E1543_E_PHY_ID:
+ case I347AT4_E_PHY_ID:
+ case M88E1112_E_PHY_ID:
+ case M88E1340M_E_PHY_ID:
+ phy_type = e1000_phy_m88;
+ break;
+ case IGP01E1000_I_PHY_ID: /* IGP 1 & 2 share this */
+ phy_type = e1000_phy_igp_2;
+ break;
+ case GG82563_E_PHY_ID:
+ phy_type = e1000_phy_gg82563;
+ break;
+ case IGP03E1000_E_PHY_ID:
+ phy_type = e1000_phy_igp_3;
+ break;
+ case IFE_E_PHY_ID:
+ case IFE_PLUS_E_PHY_ID:
+ case IFE_C_E_PHY_ID:
+ phy_type = e1000_phy_ife;
+ break;
+ case I82580_I_PHY_ID:
+ phy_type = e1000_phy_82580;
+ break;
+ case I210_I_PHY_ID:
+ phy_type = e1000_phy_i210;
+ break;
+ default:
+ phy_type = e1000_phy_unknown;
+ break;
+ }
+ return phy_type;
+}
+
+/**
+ * e1000_determine_phy_address - Determines PHY address.
+ * @hw: pointer to the HW structure
+ *
+ * This uses a trial and error method to loop through possible PHY
+ * addresses. It tests each by reading the PHY ID registers and
+ * checking for a match.
+ **/
+s32 e1000_determine_phy_address(struct e1000_hw *hw)
+{
+ u32 phy_addr = 0;
+ u32 i;
+ enum e1000_phy_type phy_type = e1000_phy_unknown;
+
+ hw->phy.id = phy_type;
+
+ for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) {
+ hw->phy.addr = phy_addr;
+ i = 0;
+
+ do {
+ e1000_get_phy_id(hw);
+ phy_type = e1000_get_phy_type_from_id(hw->phy.id);
+
+ /* If phy_type is valid, break - we found our
+ * PHY address
+ */
+ if (phy_type != e1000_phy_unknown)
+ return E1000_SUCCESS;
+
+ msec_delay(1);
+ i++;
+ } while (i < 10);
+ }
+
+ return -E1000_ERR_PHY_TYPE;
+}
+
+/**
+ * e1000_power_up_phy_copper - Restore copper link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, restore the link to previous
+ * settings.
+ **/
+void e1000_power_up_phy_copper(struct e1000_hw *hw)
+{
+ u16 mii_reg = 0;
+ u16 power_reg = 0;
+
+ /* The PHY will retain its settings across a power down/up cycle */
+ hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+ mii_reg &= ~MII_CR_POWER_DOWN;
+ if (hw->phy.type == e1000_phy_i210) {
+ hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg);
+ power_reg &= ~GS40G_CS_POWER_DOWN;
+ hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg);
+ }
+ hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
+}
+
+/**
+ * e1000_power_down_phy_copper - Restore copper link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, restore the link to previous
+ * settings.
+ **/
+void e1000_power_down_phy_copper(struct e1000_hw *hw)
+{
+ u16 mii_reg = 0;
+ u16 power_reg = 0;
+
+ /* The PHY will retain its settings across a power down/up cycle */
+ hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+ mii_reg |= MII_CR_POWER_DOWN;
+ /* i210 Phy requires an additional bit for power up/down */
+ if (hw->phy.type == e1000_phy_i210) {
+ hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg);
+ power_reg |= GS40G_CS_POWER_DOWN;
+ hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg);
+ }
+ hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
+ msec_delay(1);
+}
+
+/**
+ * e1000_check_polarity_82577 - Checks the polarity.
+ * @hw: pointer to the HW structure
+ *
+ * Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ * Polarity is determined based on the PHY specific status register.
+ **/
+s32 e1000_check_polarity_82577(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 data;
+
+ DEBUGFUNC("e1000_check_polarity_82577");
+
+ ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data);
+
+ if (!ret_val)
+ phy->cable_polarity = ((data & I82577_PHY_STATUS2_REV_POLARITY)
+ ? e1000_rev_polarity_reversed
+ : e1000_rev_polarity_normal);
+
+ return ret_val;
+}
+
+/**
+ * e1000_phy_force_speed_duplex_82577 - Force speed/duplex for I82577 PHY
+ * @hw: pointer to the HW structure
+ *
+ * Calls the PHY setup function to force speed and duplex.
+ **/
+s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data;
+ bool link;
+
+ DEBUGFUNC("e1000_phy_force_speed_duplex_82577");
+
+ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ e1000_phy_force_speed_duplex_setup(hw, &phy_data);
+
+ ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
+ if (ret_val)
+ return ret_val;
+
+ usec_delay(1);
+
+ if (phy->autoneg_wait_to_complete) {
+ DEBUGOUT("Waiting for forced speed/duplex link on 82577 phy\n");
+
+ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+ 100000, &link);
+ if (ret_val)
+ return ret_val;
+
+ if (!link)
+ DEBUGOUT("Link taking longer than expected.\n");
+
+ /* Try once more */
+ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+ 100000, &link);
+ }
+
+ return ret_val;
+}
+
+/**
+ * e1000_get_phy_info_82577 - Retrieve I82577 PHY information
+ * @hw: pointer to the HW structure
+ *
+ * Read PHY status to determine if link is up. If link is up, then
+ * set/determine 10base-T extended distance and polarity correction. Read
+ * PHY port status to determine MDI/MDIx and speed. Based on the speed,
+ * determine on the cable length, local and remote receiver.
+ **/
+s32 e1000_get_phy_info_82577(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 data;
+ bool link;
+
+ DEBUGFUNC("e1000_get_phy_info_82577");
+
+ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
+ if (ret_val)
+ return ret_val;
+
+ if (!link) {
+ DEBUGOUT("Phy info is only valid if link is up\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ phy->polarity_correction = true;
+
+ ret_val = e1000_check_polarity_82577(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data);
+ if (ret_val)
+ return ret_val;
+
+ phy->is_mdix = !!(data & I82577_PHY_STATUS2_MDIX);
+
+ if ((data & I82577_PHY_STATUS2_SPEED_MASK) ==
+ I82577_PHY_STATUS2_SPEED_1000MBPS) {
+ ret_val = hw->phy.ops.get_cable_length(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data);
+ if (ret_val)
+ return ret_val;
+
+ phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
+ ? e1000_1000t_rx_status_ok
+ : e1000_1000t_rx_status_not_ok;
+
+ phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
+ ? e1000_1000t_rx_status_ok
+ : e1000_1000t_rx_status_not_ok;
+ } else {
+ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+ phy->local_rx = e1000_1000t_rx_status_undefined;
+ phy->remote_rx = e1000_1000t_rx_status_undefined;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_get_cable_length_82577 - Determine cable length for 82577 PHY
+ * @hw: pointer to the HW structure
+ *
+ * Reads the diagnostic status register and verifies result is valid before
+ * placing it in the phy_cable_length field.
+ **/
+s32 e1000_get_cable_length_82577(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u16 phy_data, length;
+
+ DEBUGFUNC("e1000_get_cable_length_82577");
+
+ ret_val = phy->ops.read_reg(hw, I82577_PHY_DIAG_STATUS, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >>
+ I82577_DSTATUS_CABLE_LENGTH_SHIFT);
+
+ if (length == E1000_CABLE_LENGTH_UNDEFINED)
+ return -E1000_ERR_PHY;
+
+ phy->cable_length = length;
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_phy_reg_gs40g - Write GS40G PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Acquires semaphore, if necessary, then writes the data to PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ **/
+s32 e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data)
+{
+ s32 ret_val;
+ u16 page = offset >> GS40G_PAGE_SHIFT;
+
+ DEBUGFUNC("e1000_write_phy_reg_gs40g");
+
+ offset = offset & GS40G_OFFSET_MASK;
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page);
+ if (ret_val)
+ goto release;
+ ret_val = e1000_write_phy_reg_mdic(hw, offset, data);
+
+release:
+ hw->phy.ops.release(hw);
+ return ret_val;
+}
+
+/**
+ * e1000_read_phy_reg_gs40g - Read GS40G PHY register
+ * @hw: pointer to the HW structure
+ * @offset: lower half is register offset to read to
+ * upper half is page to use.
+ * @data: data to read at register offset
+ *
+ * Acquires semaphore, if necessary, then reads the data in the PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ **/
+s32 e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+ s32 ret_val;
+ u16 page = offset >> GS40G_PAGE_SHIFT;
+
+ DEBUGFUNC("e1000_read_phy_reg_gs40g");
+
+ offset = offset & GS40G_OFFSET_MASK;
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page);
+ if (ret_val)
+ goto release;
+ ret_val = e1000_read_phy_reg_mdic(hw, offset, data);
+
+release:
+ hw->phy.ops.release(hw);
+ return ret_val;
+}
+
+/**
+ * e1000_read_phy_reg_mphy - Read mPHY control register
+ * @hw: pointer to the HW structure
+ * @address: address to be read
+ * @data: pointer to the read data
+ *
+ * Reads the mPHY control register in the PHY at offset and stores the
+ * information read to data.
+ **/
+s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data)
+{
+ u32 mphy_ctrl = 0;
+ bool locked = false;
+ bool ready = false;
+
+ DEBUGFUNC("e1000_read_phy_reg_mphy");
+
+ /* Check if mPHY is ready to read/write operations */
+ ready = e1000_is_mphy_ready(hw);
+ if (!ready)
+ return -E1000_ERR_PHY;
+
+ /* Check if mPHY access is disabled and enable it if so */
+ mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL);
+ if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) {
+ locked = true;
+ ready = e1000_is_mphy_ready(hw);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ mphy_ctrl |= E1000_MPHY_ENA_ACCESS;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
+ }
+
+ /* Set the address that we want to read */
+ ready = e1000_is_mphy_ready(hw);
+ if (!ready)
+ return -E1000_ERR_PHY;
+
+ /* We mask address, because we want to use only current lane */
+ mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK &
+ ~E1000_MPHY_ADDRESS_FNC_OVERRIDE) |
+ (address & E1000_MPHY_ADDRESS_MASK);
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
+
+ /* Read data from the address */
+ ready = e1000_is_mphy_ready(hw);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ *data = E1000_READ_REG(hw, E1000_MPHY_DATA);
+
+ /* Disable access to mPHY if it was originally disabled */
+ if (locked)
+ ready = e1000_is_mphy_ready(hw);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+ E1000_MPHY_DIS_ACCESS);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_phy_reg_mphy - Write mPHY control register
+ * @hw: pointer to the HW structure
+ * @address: address to write to
+ * @data: data to write to register at offset
+ * @line_override: used when we want to use different line than default one
+ *
+ * Writes data to mPHY control register.
+ **/
+s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data,
+ bool line_override)
+{
+ u32 mphy_ctrl = 0;
+ bool locked = false;
+ bool ready = false;
+
+ DEBUGFUNC("e1000_write_phy_reg_mphy");
+
+ /* Check if mPHY is ready to read/write operations */
+ ready = e1000_is_mphy_ready(hw);
+ if (!ready)
+ return -E1000_ERR_PHY;
+
+ /* Check if mPHY access is disabled and enable it if so */
+ mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL);
+ if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) {
+ locked = true;
+ ready = e1000_is_mphy_ready(hw);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ mphy_ctrl |= E1000_MPHY_ENA_ACCESS;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
+ }
+
+ /* Set the address that we want to read */
+ ready = e1000_is_mphy_ready(hw);
+ if (!ready)
+ return -E1000_ERR_PHY;
+
+ /* We mask address, because we want to use only current lane */
+ if (line_override)
+ mphy_ctrl |= E1000_MPHY_ADDRESS_FNC_OVERRIDE;
+ else
+ mphy_ctrl &= ~E1000_MPHY_ADDRESS_FNC_OVERRIDE;
+ mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK) |
+ (address & E1000_MPHY_ADDRESS_MASK);
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
+
+ /* Read data from the address */
+ ready = e1000_is_mphy_ready(hw);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ E1000_WRITE_REG(hw, E1000_MPHY_DATA, data);
+
+ /* Disable access to mPHY if it was originally disabled */
+ if (locked)
+ ready = e1000_is_mphy_ready(hw);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+ E1000_MPHY_DIS_ACCESS);
+
+ return E1000_SUCCESS;
+}
+
+/**
+ * e1000_is_mphy_ready - Check if mPHY control register is not busy
+ * @hw: pointer to the HW structure
+ *
+ * Returns mPHY control register status.
+ **/
+bool e1000_is_mphy_ready(struct e1000_hw *hw)
+{
+ u16 retry_count = 0;
+ u32 mphy_ctrl = 0;
+ bool ready = false;
+
+ while (retry_count < 2) {
+ mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL);
+ if (mphy_ctrl & E1000_MPHY_BUSY) {
+ usec_delay(20);
+ retry_count++;
+ continue;
+ }
+ ready = true;
+ break;
+ }
+
+ if (!ready)
+ DEBUGOUT("ERROR READING mPHY control register, phy is busy.\n");
+
+ return ready;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
new file mode 100644
index 00000000..5387c5e7
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
@@ -0,0 +1,256 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_PHY_H_
+#define _E1000_PHY_H_
+
+void e1000_init_phy_ops_generic(struct e1000_hw *hw);
+s32 e1000_null_read_reg(struct e1000_hw *hw, u32 offset, u16 *data);
+void e1000_null_phy_generic(struct e1000_hw *hw);
+s32 e1000_null_lplu_state(struct e1000_hw *hw, bool active);
+s32 e1000_null_write_reg(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_null_set_page(struct e1000_hw *hw, u16 data);
+s32 e1000_read_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data);
+s32 e1000_write_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data);
+s32 e1000_check_downshift_generic(struct e1000_hw *hw);
+s32 e1000_check_polarity_m88(struct e1000_hw *hw);
+s32 e1000_check_polarity_igp(struct e1000_hw *hw);
+s32 e1000_check_polarity_ife(struct e1000_hw *hw);
+s32 e1000_check_reset_block_generic(struct e1000_hw *hw);
+s32 e1000_copper_link_setup_igp(struct e1000_hw *hw);
+s32 e1000_copper_link_setup_m88(struct e1000_hw *hw);
+s32 e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw);
+s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw);
+s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw);
+s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw);
+s32 e1000_get_cable_length_m88(struct e1000_hw *hw);
+s32 e1000_get_cable_length_m88_gen2(struct e1000_hw *hw);
+s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw);
+s32 e1000_get_cfg_done_generic(struct e1000_hw *hw);
+s32 e1000_get_phy_id(struct e1000_hw *hw);
+s32 e1000_get_phy_info_igp(struct e1000_hw *hw);
+s32 e1000_get_phy_info_m88(struct e1000_hw *hw);
+s32 e1000_get_phy_info_ife(struct e1000_hw *hw);
+s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw);
+void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl);
+s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw);
+s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw);
+s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page);
+s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active);
+s32 e1000_setup_copper_link_generic(struct e1000_hw *hw);
+s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
+ u32 usec_interval, bool *success);
+s32 e1000_phy_init_script_igp3(struct e1000_hw *hw);
+enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id);
+s32 e1000_determine_phy_address(struct e1000_hw *hw);
+s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg);
+s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg);
+void e1000_power_up_phy_copper(struct e1000_hw *hw);
+void e1000_power_down_phy_copper(struct e1000_hw *hw);
+s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data);
+s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data);
+s32 e1000_copper_link_setup_82577(struct e1000_hw *hw);
+s32 e1000_check_polarity_82577(struct e1000_hw *hw);
+s32 e1000_get_phy_info_82577(struct e1000_hw *hw);
+s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw);
+s32 e1000_get_cable_length_82577(struct e1000_hw *hw);
+s32 e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data);
+s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data,
+ bool line_override);
+bool e1000_is_mphy_ready(struct e1000_hw *hw);
+
+#define E1000_MAX_PHY_ADDR 8
+
+/* IGP01E1000 Specific Registers */
+#define IGP01E1000_PHY_PORT_CONFIG 0x10 /* Port Config */
+#define IGP01E1000_PHY_PORT_STATUS 0x11 /* Status */
+#define IGP01E1000_PHY_PORT_CTRL 0x12 /* Control */
+#define IGP01E1000_PHY_LINK_HEALTH 0x13 /* PHY Link Health */
+#define IGP02E1000_PHY_POWER_MGMT 0x19 /* Power Management */
+#define IGP01E1000_PHY_PAGE_SELECT 0x1F /* Page Select */
+#define BM_PHY_PAGE_SELECT 22 /* Page Select for BM */
+#define IGP_PAGE_SHIFT 5
+#define PHY_REG_MASK 0x1F
+
+/* GS40G - I210 PHY defines */
+#define GS40G_PAGE_SELECT 0x16
+#define GS40G_PAGE_SHIFT 16
+#define GS40G_OFFSET_MASK 0xFFFF
+#define GS40G_PAGE_2 0x20000
+#define GS40G_MAC_REG2 0x15
+#define GS40G_MAC_LB 0x4140
+#define GS40G_MAC_SPEED_1G 0X0006
+#define GS40G_COPPER_SPEC 0x0010
+#define GS40G_CS_POWER_DOWN 0x0002
+
+#define HV_INTC_FC_PAGE_START 768
+#define I82578_ADDR_REG 29
+#define I82577_ADDR_REG 16
+#define I82577_CFG_REG 22
+#define I82577_CFG_ASSERT_CRS_ON_TX (1 << 15)
+#define I82577_CFG_ENABLE_DOWNSHIFT (3 << 10) /* auto downshift */
+#define I82577_CTRL_REG 23
+
+/* 82577 specific PHY registers */
+#define I82577_PHY_CTRL_2 18
+#define I82577_PHY_LBK_CTRL 19
+#define I82577_PHY_STATUS_2 26
+#define I82577_PHY_DIAG_STATUS 31
+
+/* I82577 PHY Status 2 */
+#define I82577_PHY_STATUS2_REV_POLARITY 0x0400
+#define I82577_PHY_STATUS2_MDIX 0x0800
+#define I82577_PHY_STATUS2_SPEED_MASK 0x0300
+#define I82577_PHY_STATUS2_SPEED_1000MBPS 0x0200
+
+/* I82577 PHY Control 2 */
+#define I82577_PHY_CTRL2_MANUAL_MDIX 0x0200
+#define I82577_PHY_CTRL2_AUTO_MDI_MDIX 0x0400
+#define I82577_PHY_CTRL2_MDIX_CFG_MASK 0x0600
+
+/* I82577 PHY Diagnostics Status */
+#define I82577_DSTATUS_CABLE_LENGTH 0x03FC
+#define I82577_DSTATUS_CABLE_LENGTH_SHIFT 2
+
+/* 82580 PHY Power Management */
+#define E1000_82580_PHY_POWER_MGMT 0xE14
+#define E1000_82580_PM_SPD 0x0001 /* Smart Power Down */
+#define E1000_82580_PM_D0_LPLU 0x0002 /* For D0a states */
+#define E1000_82580_PM_D3_LPLU 0x0004 /* For all other states */
+#define E1000_82580_PM_GO_LINKD 0x0020 /* Go Link Disconnect */
+
+#define E1000_MPHY_DIS_ACCESS 0x80000000 /* disable_access bit */
+#define E1000_MPHY_ENA_ACCESS 0x40000000 /* enable_access bit */
+#define E1000_MPHY_BUSY 0x00010000 /* busy bit */
+#define E1000_MPHY_ADDRESS_FNC_OVERRIDE 0x20000000 /* fnc_override bit */
+#define E1000_MPHY_ADDRESS_MASK 0x0000FFFF /* address mask */
+
+#define IGP01E1000_PHY_PCS_INIT_REG 0x00B4
+#define IGP01E1000_PHY_POLARITY_MASK 0x0078
+
+#define IGP01E1000_PSCR_AUTO_MDIX 0x1000
+#define IGP01E1000_PSCR_FORCE_MDI_MDIX 0x2000 /* 0=MDI, 1=MDIX */
+
+#define IGP01E1000_PSCFR_SMART_SPEED 0x0080
+
+#define IGP02E1000_PM_SPD 0x0001 /* Smart Power Down */
+#define IGP02E1000_PM_D0_LPLU 0x0002 /* For D0a states */
+#define IGP02E1000_PM_D3_LPLU 0x0004 /* For all other states */
+
+#define IGP01E1000_PLHR_SS_DOWNGRADE 0x8000
+
+#define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002
+#define IGP01E1000_PSSR_MDIX 0x0800
+#define IGP01E1000_PSSR_SPEED_MASK 0xC000
+#define IGP01E1000_PSSR_SPEED_1000MBPS 0xC000
+
+#define IGP02E1000_PHY_CHANNEL_NUM 4
+#define IGP02E1000_PHY_AGC_A 0x11B1
+#define IGP02E1000_PHY_AGC_B 0x12B1
+#define IGP02E1000_PHY_AGC_C 0x14B1
+#define IGP02E1000_PHY_AGC_D 0x18B1
+
+#define IGP02E1000_AGC_LENGTH_SHIFT 9 /* Course=15:13, Fine=12:9 */
+#define IGP02E1000_AGC_LENGTH_MASK 0x7F
+#define IGP02E1000_AGC_RANGE 15
+
+#define E1000_CABLE_LENGTH_UNDEFINED 0xFF
+
+#define E1000_KMRNCTRLSTA_OFFSET 0x001F0000
+#define E1000_KMRNCTRLSTA_OFFSET_SHIFT 16
+#define E1000_KMRNCTRLSTA_REN 0x00200000
+#define E1000_KMRNCTRLSTA_DIAG_OFFSET 0x3 /* Kumeran Diagnostic */
+#define E1000_KMRNCTRLSTA_TIMEOUTS 0x4 /* Kumeran Timeouts */
+#define E1000_KMRNCTRLSTA_INBAND_PARAM 0x9 /* Kumeran InBand Parameters */
+#define E1000_KMRNCTRLSTA_IBIST_DISABLE 0x0200 /* Kumeran IBIST Disable */
+#define E1000_KMRNCTRLSTA_DIAG_NELPBK 0x1000 /* Nearend Loopback mode */
+
+#define IFE_PHY_EXTENDED_STATUS_CONTROL 0x10
+#define IFE_PHY_SPECIAL_CONTROL 0x11 /* 100BaseTx PHY Special Ctrl */
+#define IFE_PHY_SPECIAL_CONTROL_LED 0x1B /* PHY Special and LED Ctrl */
+#define IFE_PHY_MDIX_CONTROL 0x1C /* MDI/MDI-X Control */
+
+/* IFE PHY Extended Status Control */
+#define IFE_PESC_POLARITY_REVERSED 0x0100
+
+/* IFE PHY Special Control */
+#define IFE_PSC_AUTO_POLARITY_DISABLE 0x0010
+#define IFE_PSC_FORCE_POLARITY 0x0020
+
+/* IFE PHY Special Control and LED Control */
+#define IFE_PSCL_PROBE_MODE 0x0020
+#define IFE_PSCL_PROBE_LEDS_OFF 0x0006 /* Force LEDs 0 and 2 off */
+#define IFE_PSCL_PROBE_LEDS_ON 0x0007 /* Force LEDs 0 and 2 on */
+
+/* IFE PHY MDIX Control */
+#define IFE_PMC_MDIX_STATUS 0x0020 /* 1=MDI-X, 0=MDI */
+#define IFE_PMC_FORCE_MDIX 0x0040 /* 1=force MDI-X, 0=force MDI */
+#define IFE_PMC_AUTO_MDIX 0x0080 /* 1=enable auto, 0=disable */
+
+/* SFP modules ID memory locations */
+#define E1000_SFF_IDENTIFIER_OFFSET 0x00
+#define E1000_SFF_IDENTIFIER_SFF 0x02
+#define E1000_SFF_IDENTIFIER_SFP 0x03
+
+#define E1000_SFF_ETH_FLAGS_OFFSET 0x06
+/* Flags for SFP modules compatible with ETH up to 1Gb */
+struct sfp_e1000_flags {
+ u8 e1000_base_sx:1;
+ u8 e1000_base_lx:1;
+ u8 e1000_base_cx:1;
+ u8 e1000_base_t:1;
+ u8 e100_base_lx:1;
+ u8 e100_base_fx:1;
+ u8 e10_base_bx10:1;
+ u8 e10_base_px:1;
+};
+
+/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */
+#define E1000_SFF_VENDOR_OUI_TYCO 0x00407600
+#define E1000_SFF_VENDOR_OUI_FTL 0x00906500
+#define E1000_SFF_VENDOR_OUI_AVAGO 0x00176A00
+#define E1000_SFF_VENDOR_OUI_INTEL 0x001B2100
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
new file mode 100644
index 00000000..0e083c54
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
@@ -0,0 +1,646 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_REGS_H_
+#define _E1000_REGS_H_
+
+#define E1000_CTRL 0x00000 /* Device Control - RW */
+#define E1000_STATUS 0x00008 /* Device Status - RO */
+#define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */
+#define E1000_EERD 0x00014 /* EEPROM Read - RW */
+#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */
+#define E1000_FLA 0x0001C /* Flash Access - RW */
+#define E1000_MDIC 0x00020 /* MDI Control - RW */
+#define E1000_MDICNFG 0x00E04 /* MDI Config - RW */
+#define E1000_REGISTER_SET_SIZE 0x20000 /* CSR Size */
+#define E1000_EEPROM_INIT_CTRL_WORD_2 0x0F /* EEPROM Init Ctrl Word 2 */
+#define E1000_EEPROM_PCIE_CTRL_WORD_2 0x28 /* EEPROM PCIe Ctrl Word 2 */
+#define E1000_BARCTRL 0x5BBC /* BAR ctrl reg */
+#define E1000_BARCTRL_FLSIZE 0x0700 /* BAR ctrl Flsize */
+#define E1000_BARCTRL_CSRSIZE 0x2000 /* BAR ctrl CSR size */
+#define E1000_MPHY_ADDR_CTRL 0x0024 /* GbE MPHY Address Control */
+#define E1000_MPHY_DATA 0x0E10 /* GBE MPHY Data */
+#define E1000_MPHY_STAT 0x0E0C /* GBE MPHY Statistics */
+#define E1000_PPHY_CTRL 0x5b48 /* PCIe PHY Control */
+#define E1000_I350_BARCTRL 0x5BFC /* BAR ctrl reg */
+#define E1000_I350_DTXMXPKTSZ 0x355C /* Maximum sent packet size reg*/
+#define E1000_SCTL 0x00024 /* SerDes Control - RW */
+#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */
+#define E1000_FCAH 0x0002C /* Flow Control Address High -RW */
+#define E1000_FCT 0x00030 /* Flow Control Type - RW */
+#define E1000_CONNSW 0x00034 /* Copper/Fiber switch control - RW */
+#define E1000_VET 0x00038 /* VLAN Ether Type - RW */
+#define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */
+#define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */
+#define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */
+#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */
+#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */
+#define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */
+#define E1000_RCTL 0x00100 /* Rx Control - RW */
+#define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */
+#define E1000_TXCW 0x00178 /* Tx Configuration Word - RW */
+#define E1000_RXCW 0x00180 /* Rx Configuration Word - RO */
+#define E1000_EICR 0x01580 /* Ext. Interrupt Cause Read - R/clr */
+#define E1000_EITR(_n) (0x01680 + (0x4 * (_n)))
+#define E1000_EICS 0x01520 /* Ext. Interrupt Cause Set - W0 */
+#define E1000_EIMS 0x01524 /* Ext. Interrupt Mask Set/Read - RW */
+#define E1000_EIMC 0x01528 /* Ext. Interrupt Mask Clear - WO */
+#define E1000_EIAC 0x0152C /* Ext. Interrupt Auto Clear - RW */
+#define E1000_EIAM 0x01530 /* Ext. Interrupt Ack Auto Clear Mask - RW */
+#define E1000_GPIE 0x01514 /* General Purpose Interrupt Enable - RW */
+#define E1000_IVAR0 0x01700 /* Interrupt Vector Allocation (array) - RW */
+#define E1000_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */
+#define E1000_TCTL 0x00400 /* Tx Control - RW */
+#define E1000_TCTL_EXT 0x00404 /* Extended Tx Control - RW */
+#define E1000_TIPG 0x00410 /* Tx Inter-packet gap -RW */
+#define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */
+#define E1000_LEDCTL 0x00E00 /* LED Control - RW */
+#define E1000_LEDMUX 0x08130 /* LED MUX Control */
+#define E1000_EXTCNF_CTRL 0x00F00 /* Extended Configuration Control */
+#define E1000_EXTCNF_SIZE 0x00F08 /* Extended Configuration Size */
+#define E1000_PHY_CTRL 0x00F10 /* PHY Control Register in CSR */
+#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */
+#define E1000_PBS 0x01008 /* Packet Buffer Size */
+#define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */
+#define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */
+#define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */
+#define E1000_FLOP 0x0103C /* FLASH Opcode Register */
+#define E1000_I2CCMD 0x01028 /* SFPI2C Command Register - RW */
+#define E1000_I2CPARAMS 0x0102C /* SFPI2C Parameters Register - RW */
+#define E1000_I2CBB_EN 0x00000100 /* I2C - Bit Bang Enable */
+#define E1000_I2C_CLK_OUT 0x00000200 /* I2C- Clock */
+#define E1000_I2C_DATA_OUT 0x00000400 /* I2C- Data Out */
+#define E1000_I2C_DATA_OE_N 0x00000800 /* I2C- Data Output Enable */
+#define E1000_I2C_DATA_IN 0x00001000 /* I2C- Data In */
+#define E1000_I2C_CLK_OE_N 0x00002000 /* I2C- Clock Output Enable */
+#define E1000_I2C_CLK_IN 0x00004000 /* I2C- Clock In */
+#define E1000_I2C_CLK_STRETCH_DIS 0x00008000 /* I2C- Dis Clk Stretching */
+#define E1000_WDSTP 0x01040 /* Watchdog Setup - RW */
+#define E1000_SWDSTS 0x01044 /* SW Device Status - RW */
+#define E1000_FRTIMER 0x01048 /* Free Running Timer - RW */
+#define E1000_TCPTIMER 0x0104C /* TCP Timer - RW */
+#define E1000_VPDDIAG 0x01060 /* VPD Diagnostic - RO */
+#define E1000_ICR_V2 0x01500 /* Intr Cause - new location - RC */
+#define E1000_ICS_V2 0x01504 /* Intr Cause Set - new location - WO */
+#define E1000_IMS_V2 0x01508 /* Intr Mask Set/Read - new location - RW */
+#define E1000_IMC_V2 0x0150C /* Intr Mask Clear - new location - WO */
+#define E1000_IAM_V2 0x01510 /* Intr Ack Auto Mask - new location - RW */
+#define E1000_ERT 0x02008 /* Early Rx Threshold - RW */
+#define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */
+#define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */
+#define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */
+#define E1000_RDFH 0x02410 /* Rx Data FIFO Head - RW */
+#define E1000_RDFT 0x02418 /* Rx Data FIFO Tail - RW */
+#define E1000_RDFHS 0x02420 /* Rx Data FIFO Head Saved - RW */
+#define E1000_RDFTS 0x02428 /* Rx Data FIFO Tail Saved - RW */
+#define E1000_RDFPC 0x02430 /* Rx Data FIFO Packet Count - RW */
+#define E1000_PBRTH 0x02458 /* PB Rx Arbitration Threshold - RW */
+#define E1000_FCRTV 0x02460 /* Flow Control Refresh Timer Value - RW */
+/* Split and Replication Rx Control - RW */
+#define E1000_RDPUMB 0x025CC /* DMA Rx Descriptor uC Mailbox - RW */
+#define E1000_RDPUAD 0x025D0 /* DMA Rx Descriptor uC Addr Command - RW */
+#define E1000_RDPUWD 0x025D4 /* DMA Rx Descriptor uC Data Write - RW */
+#define E1000_RDPURD 0x025D8 /* DMA Rx Descriptor uC Data Read - RW */
+#define E1000_RDPUCTL 0x025DC /* DMA Rx Descriptor uC Control - RW */
+#define E1000_PBDIAG 0x02458 /* Packet Buffer Diagnostic - RW */
+#define E1000_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */
+#define E1000_IRPBS 0x02404 /* Same as RXPBS, renamed for newer Si - RW */
+#define E1000_PBRWAC 0x024E8 /* Rx packet buffer wrap around counter - RO */
+#define E1000_RDTR 0x02820 /* Rx Delay Timer - RW */
+#define E1000_RADV 0x0282C /* Rx Interrupt Absolute Delay Timer - RW */
+#define E1000_EMIADD 0x10 /* Extended Memory Indirect Address */
+#define E1000_EMIDATA 0x11 /* Extended Memory Indirect Data */
+#define E1000_SRWR 0x12018 /* Shadow Ram Write Register - RW */
+#define E1000_I210_FLMNGCTL 0x12038
+#define E1000_I210_FLMNGDATA 0x1203C
+#define E1000_I210_FLMNGCNT 0x12040
+
+#define E1000_I210_FLSWCTL 0x12048
+#define E1000_I210_FLSWDATA 0x1204C
+#define E1000_I210_FLSWCNT 0x12050
+
+#define E1000_I210_FLA 0x1201C
+
+#define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n))
+#define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */
+
+/* QAV Tx mode control register */
+#define E1000_I210_TQAVCTRL 0x3570
+
+/* QAV Tx mode control register bitfields masks */
+/* QAV enable */
+#define E1000_TQAVCTRL_MODE (1 << 0)
+/* Fetching arbitration type */
+#define E1000_TQAVCTRL_FETCH_ARB (1 << 4)
+/* Fetching timer enable */
+#define E1000_TQAVCTRL_FETCH_TIMER_ENABLE (1 << 5)
+/* Launch arbitration type */
+#define E1000_TQAVCTRL_LAUNCH_ARB (1 << 8)
+/* Launch timer enable */
+#define E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE (1 << 9)
+/* SP waits for SR enable */
+#define E1000_TQAVCTRL_SP_WAIT_SR (1 << 10)
+/* Fetching timer correction */
+#define E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET 16
+#define E1000_TQAVCTRL_FETCH_TIMER_DELTA \
+ (0xFFFF << E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET)
+
+/* High credit registers where _n can be 0 or 1. */
+#define E1000_I210_TQAVHC(_n) (0x300C + 0x40 * (_n))
+
+/* Queues fetch arbitration priority control register */
+#define E1000_I210_TQAVARBCTRL 0x3574
+/* Queues priority masks where _n and _p can be 0-3. */
+#define E1000_TQAVARBCTRL_QUEUE_PRI(_n, _p) ((_p) << (2 * _n))
+/* QAV Tx mode control registers where _n can be 0 or 1. */
+#define E1000_I210_TQAVCC(_n) (0x3004 + 0x40 * (_n))
+
+/* QAV Tx mode control register bitfields masks */
+#define E1000_TQAVCC_IDLE_SLOPE 0xFFFF /* Idle slope */
+#define E1000_TQAVCC_KEEP_CREDITS (1 << 30) /* Keep credits opt enable */
+#define E1000_TQAVCC_QUEUE_MODE (1 << 31) /* SP vs. SR Tx mode */
+
+/* Good transmitted packets counter registers */
+#define E1000_PQGPTC(_n) (0x010014 + (0x100 * (_n)))
+
+/* Queues packet buffer size masks where _n can be 0-3 and _s 0-63 [kB] */
+#define E1000_I210_TXPBS_SIZE(_n, _s) ((_s) << (6 * _n))
+
+#define E1000_MMDAC 13 /* MMD Access Control */
+#define E1000_MMDAAD 14 /* MMD Access Address/Data */
+
+/* Convenience macros
+ *
+ * Note: "_n" is the queue number of the register to be written to.
+ *
+ * Example usage:
+ * E1000_RDBAL_REG(current_rx_queue)
+ */
+#define E1000_RDBAL(_n) ((_n) < 4 ? (0x02800 + ((_n) * 0x100)) : \
+ (0x0C000 + ((_n) * 0x40)))
+#define E1000_RDBAH(_n) ((_n) < 4 ? (0x02804 + ((_n) * 0x100)) : \
+ (0x0C004 + ((_n) * 0x40)))
+#define E1000_RDLEN(_n) ((_n) < 4 ? (0x02808 + ((_n) * 0x100)) : \
+ (0x0C008 + ((_n) * 0x40)))
+#define E1000_SRRCTL(_n) ((_n) < 4 ? (0x0280C + ((_n) * 0x100)) : \
+ (0x0C00C + ((_n) * 0x40)))
+#define E1000_RDH(_n) ((_n) < 4 ? (0x02810 + ((_n) * 0x100)) : \
+ (0x0C010 + ((_n) * 0x40)))
+#define E1000_RXCTL(_n) ((_n) < 4 ? (0x02814 + ((_n) * 0x100)) : \
+ (0x0C014 + ((_n) * 0x40)))
+#define E1000_DCA_RXCTRL(_n) E1000_RXCTL(_n)
+#define E1000_RDT(_n) ((_n) < 4 ? (0x02818 + ((_n) * 0x100)) : \
+ (0x0C018 + ((_n) * 0x40)))
+#define E1000_RXDCTL(_n) ((_n) < 4 ? (0x02828 + ((_n) * 0x100)) : \
+ (0x0C028 + ((_n) * 0x40)))
+#define E1000_RQDPC(_n) ((_n) < 4 ? (0x02830 + ((_n) * 0x100)) : \
+ (0x0C030 + ((_n) * 0x40)))
+#define E1000_TDBAL(_n) ((_n) < 4 ? (0x03800 + ((_n) * 0x100)) : \
+ (0x0E000 + ((_n) * 0x40)))
+#define E1000_TDBAH(_n) ((_n) < 4 ? (0x03804 + ((_n) * 0x100)) : \
+ (0x0E004 + ((_n) * 0x40)))
+#define E1000_TDLEN(_n) ((_n) < 4 ? (0x03808 + ((_n) * 0x100)) : \
+ (0x0E008 + ((_n) * 0x40)))
+#define E1000_TDH(_n) ((_n) < 4 ? (0x03810 + ((_n) * 0x100)) : \
+ (0x0E010 + ((_n) * 0x40)))
+#define E1000_TXCTL(_n) ((_n) < 4 ? (0x03814 + ((_n) * 0x100)) : \
+ (0x0E014 + ((_n) * 0x40)))
+#define E1000_DCA_TXCTRL(_n) E1000_TXCTL(_n)
+#define E1000_TDT(_n) ((_n) < 4 ? (0x03818 + ((_n) * 0x100)) : \
+ (0x0E018 + ((_n) * 0x40)))
+#define E1000_TXDCTL(_n) ((_n) < 4 ? (0x03828 + ((_n) * 0x100)) : \
+ (0x0E028 + ((_n) * 0x40)))
+#define E1000_TDWBAL(_n) ((_n) < 4 ? (0x03838 + ((_n) * 0x100)) : \
+ (0x0E038 + ((_n) * 0x40)))
+#define E1000_TDWBAH(_n) ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) : \
+ (0x0E03C + ((_n) * 0x40)))
+#define E1000_TARC(_n) (0x03840 + ((_n) * 0x100))
+#define E1000_RSRPD 0x02C00 /* Rx Small Packet Detect - RW */
+#define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */
+#define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */
+#define E1000_PSRTYPE(_i) (0x05480 + ((_i) * 4))
+#define E1000_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
+ (0x054E0 + ((_i - 16) * 8)))
+#define E1000_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
+ (0x054E4 + ((_i - 16) * 8)))
+#define E1000_SHRAL(_i) (0x05438 + ((_i) * 8))
+#define E1000_SHRAH(_i) (0x0543C + ((_i) * 8))
+#define E1000_IP4AT_REG(_i) (0x05840 + ((_i) * 8))
+#define E1000_IP6AT_REG(_i) (0x05880 + ((_i) * 4))
+#define E1000_WUPM_REG(_i) (0x05A00 + ((_i) * 4))
+#define E1000_FFMT_REG(_i) (0x09000 + ((_i) * 8))
+#define E1000_FFVT_REG(_i) (0x09800 + ((_i) * 8))
+#define E1000_FFLT_REG(_i) (0x05F00 + ((_i) * 8))
+#define E1000_PBSLAC 0x03100 /* Pkt Buffer Slave Access Control */
+#define E1000_PBSLAD(_n) (0x03110 + (0x4 * (_n))) /* Pkt Buffer DWORD */
+#define E1000_TXPBS 0x03404 /* Tx Packet Buffer Size - RW */
+/* Same as TXPBS, renamed for newer Si - RW */
+#define E1000_ITPBS 0x03404
+#define E1000_TDFH 0x03410 /* Tx Data FIFO Head - RW */
+#define E1000_TDFT 0x03418 /* Tx Data FIFO Tail - RW */
+#define E1000_TDFHS 0x03420 /* Tx Data FIFO Head Saved - RW */
+#define E1000_TDFTS 0x03428 /* Tx Data FIFO Tail Saved - RW */
+#define E1000_TDFPC 0x03430 /* Tx Data FIFO Packet Count - RW */
+#define E1000_TDPUMB 0x0357C /* DMA Tx Desc uC Mail Box - RW */
+#define E1000_TDPUAD 0x03580 /* DMA Tx Desc uC Addr Command - RW */
+#define E1000_TDPUWD 0x03584 /* DMA Tx Desc uC Data Write - RW */
+#define E1000_TDPURD 0x03588 /* DMA Tx Desc uC Data Read - RW */
+#define E1000_TDPUCTL 0x0358C /* DMA Tx Desc uC Control - RW */
+#define E1000_DTXCTL 0x03590 /* DMA Tx Control - RW */
+#define E1000_DTXTCPFLGL 0x0359C /* DMA Tx Control flag low - RW */
+#define E1000_DTXTCPFLGH 0x035A0 /* DMA Tx Control flag high - RW */
+/* DMA Tx Max Total Allow Size Reqs - RW */
+#define E1000_DTXMXSZRQ 0x03540
+#define E1000_TIDV 0x03820 /* Tx Interrupt Delay Value - RW */
+#define E1000_TADV 0x0382C /* Tx Interrupt Absolute Delay Val - RW */
+#define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */
+#define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */
+#define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */
+#define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */
+#define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */
+#define E1000_SCC 0x04014 /* Single Collision Count - R/clr */
+#define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */
+#define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */
+#define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */
+#define E1000_COLC 0x04028 /* Collision Count - R/clr */
+#define E1000_DC 0x04030 /* Defer Count - R/clr */
+#define E1000_TNCRS 0x04034 /* Tx-No CRS - R/clr */
+#define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */
+#define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */
+#define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */
+#define E1000_XONRXC 0x04048 /* XON Rx Count - R/clr */
+#define E1000_XONTXC 0x0404C /* XON Tx Count - R/clr */
+#define E1000_XOFFRXC 0x04050 /* XOFF Rx Count - R/clr */
+#define E1000_XOFFTXC 0x04054 /* XOFF Tx Count - R/clr */
+#define E1000_FCRUC 0x04058 /* Flow Control Rx Unsupported Count- R/clr */
+#define E1000_PRC64 0x0405C /* Packets Rx (64 bytes) - R/clr */
+#define E1000_PRC127 0x04060 /* Packets Rx (65-127 bytes) - R/clr */
+#define E1000_PRC255 0x04064 /* Packets Rx (128-255 bytes) - R/clr */
+#define E1000_PRC511 0x04068 /* Packets Rx (255-511 bytes) - R/clr */
+#define E1000_PRC1023 0x0406C /* Packets Rx (512-1023 bytes) - R/clr */
+#define E1000_PRC1522 0x04070 /* Packets Rx (1024-1522 bytes) - R/clr */
+#define E1000_GPRC 0x04074 /* Good Packets Rx Count - R/clr */
+#define E1000_BPRC 0x04078 /* Broadcast Packets Rx Count - R/clr */
+#define E1000_MPRC 0x0407C /* Multicast Packets Rx Count - R/clr */
+#define E1000_GPTC 0x04080 /* Good Packets Tx Count - R/clr */
+#define E1000_GORCL 0x04088 /* Good Octets Rx Count Low - R/clr */
+#define E1000_GORCH 0x0408C /* Good Octets Rx Count High - R/clr */
+#define E1000_GOTCL 0x04090 /* Good Octets Tx Count Low - R/clr */
+#define E1000_GOTCH 0x04094 /* Good Octets Tx Count High - R/clr */
+#define E1000_RNBC 0x040A0 /* Rx No Buffers Count - R/clr */
+#define E1000_RUC 0x040A4 /* Rx Undersize Count - R/clr */
+#define E1000_RFC 0x040A8 /* Rx Fragment Count - R/clr */
+#define E1000_ROC 0x040AC /* Rx Oversize Count - R/clr */
+#define E1000_RJC 0x040B0 /* Rx Jabber Count - R/clr */
+#define E1000_MGTPRC 0x040B4 /* Management Packets Rx Count - R/clr */
+#define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */
+#define E1000_MGTPTC 0x040BC /* Management Packets Tx Count - R/clr */
+#define E1000_TORL 0x040C0 /* Total Octets Rx Low - R/clr */
+#define E1000_TORH 0x040C4 /* Total Octets Rx High - R/clr */
+#define E1000_TOTL 0x040C8 /* Total Octets Tx Low - R/clr */
+#define E1000_TOTH 0x040CC /* Total Octets Tx High - R/clr */
+#define E1000_TPR 0x040D0 /* Total Packets Rx - R/clr */
+#define E1000_TPT 0x040D4 /* Total Packets Tx - R/clr */
+#define E1000_PTC64 0x040D8 /* Packets Tx (64 bytes) - R/clr */
+#define E1000_PTC127 0x040DC /* Packets Tx (65-127 bytes) - R/clr */
+#define E1000_PTC255 0x040E0 /* Packets Tx (128-255 bytes) - R/clr */
+#define E1000_PTC511 0x040E4 /* Packets Tx (256-511 bytes) - R/clr */
+#define E1000_PTC1023 0x040E8 /* Packets Tx (512-1023 bytes) - R/clr */
+#define E1000_PTC1522 0x040EC /* Packets Tx (1024-1522 Bytes) - R/clr */
+#define E1000_MPTC 0x040F0 /* Multicast Packets Tx Count - R/clr */
+#define E1000_BPTC 0x040F4 /* Broadcast Packets Tx Count - R/clr */
+#define E1000_TSCTC 0x040F8 /* TCP Segmentation Context Tx - R/clr */
+#define E1000_TSCTFC 0x040FC /* TCP Segmentation Context Tx Fail - R/clr */
+#define E1000_IAC 0x04100 /* Interrupt Assertion Count */
+#define E1000_ICRXPTC 0x04104 /* Interrupt Cause Rx Pkt Timer Expire Count */
+#define E1000_ICRXATC 0x04108 /* Interrupt Cause Rx Abs Timer Expire Count */
+#define E1000_ICTXPTC 0x0410C /* Interrupt Cause Tx Pkt Timer Expire Count */
+#define E1000_ICTXATC 0x04110 /* Interrupt Cause Tx Abs Timer Expire Count */
+#define E1000_ICTXQEC 0x04118 /* Interrupt Cause Tx Queue Empty Count */
+#define E1000_ICTXQMTC 0x0411C /* Interrupt Cause Tx Queue Min Thresh Count */
+#define E1000_ICRXDMTC 0x04120 /* Interrupt Cause Rx Desc Min Thresh Count */
+#define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */
+
+/* Virtualization statistical counters */
+#define E1000_PFVFGPRC(_n) (0x010010 + (0x100 * (_n)))
+#define E1000_PFVFGPTC(_n) (0x010014 + (0x100 * (_n)))
+#define E1000_PFVFGORC(_n) (0x010018 + (0x100 * (_n)))
+#define E1000_PFVFGOTC(_n) (0x010034 + (0x100 * (_n)))
+#define E1000_PFVFMPRC(_n) (0x010038 + (0x100 * (_n)))
+#define E1000_PFVFGPRLBC(_n) (0x010040 + (0x100 * (_n)))
+#define E1000_PFVFGPTLBC(_n) (0x010044 + (0x100 * (_n)))
+#define E1000_PFVFGORLBC(_n) (0x010048 + (0x100 * (_n)))
+#define E1000_PFVFGOTLBC(_n) (0x010050 + (0x100 * (_n)))
+
+/* LinkSec */
+#define E1000_LSECTXUT 0x04300 /* Tx Untagged Pkt Cnt */
+#define E1000_LSECTXPKTE 0x04304 /* Encrypted Tx Pkts Cnt */
+#define E1000_LSECTXPKTP 0x04308 /* Protected Tx Pkt Cnt */
+#define E1000_LSECTXOCTE 0x0430C /* Encrypted Tx Octets Cnt */
+#define E1000_LSECTXOCTP 0x04310 /* Protected Tx Octets Cnt */
+#define E1000_LSECRXUT 0x04314 /* Untagged non-Strict Rx Pkt Cnt */
+#define E1000_LSECRXOCTD 0x0431C /* Rx Octets Decrypted Count */
+#define E1000_LSECRXOCTV 0x04320 /* Rx Octets Validated */
+#define E1000_LSECRXBAD 0x04324 /* Rx Bad Tag */
+#define E1000_LSECRXNOSCI 0x04328 /* Rx Packet No SCI Count */
+#define E1000_LSECRXUNSCI 0x0432C /* Rx Packet Unknown SCI Count */
+#define E1000_LSECRXUNCH 0x04330 /* Rx Unchecked Packets Count */
+#define E1000_LSECRXDELAY 0x04340 /* Rx Delayed Packet Count */
+#define E1000_LSECRXLATE 0x04350 /* Rx Late Packets Count */
+#define E1000_LSECRXOK(_n) (0x04360 + (0x04 * (_n))) /* Rx Pkt OK Cnt */
+#define E1000_LSECRXINV(_n) (0x04380 + (0x04 * (_n))) /* Rx Invalid Cnt */
+#define E1000_LSECRXNV(_n) (0x043A0 + (0x04 * (_n))) /* Rx Not Valid Cnt */
+#define E1000_LSECRXUNSA 0x043C0 /* Rx Unused SA Count */
+#define E1000_LSECRXNUSA 0x043D0 /* Rx Not Using SA Count */
+#define E1000_LSECTXCAP 0x0B000 /* Tx Capabilities Register - RO */
+#define E1000_LSECRXCAP 0x0B300 /* Rx Capabilities Register - RO */
+#define E1000_LSECTXCTRL 0x0B004 /* Tx Control - RW */
+#define E1000_LSECRXCTRL 0x0B304 /* Rx Control - RW */
+#define E1000_LSECTXSCL 0x0B008 /* Tx SCI Low - RW */
+#define E1000_LSECTXSCH 0x0B00C /* Tx SCI High - RW */
+#define E1000_LSECTXSA 0x0B010 /* Tx SA0 - RW */
+#define E1000_LSECTXPN0 0x0B018 /* Tx SA PN 0 - RW */
+#define E1000_LSECTXPN1 0x0B01C /* Tx SA PN 1 - RW */
+#define E1000_LSECRXSCL 0x0B3D0 /* Rx SCI Low - RW */
+#define E1000_LSECRXSCH 0x0B3E0 /* Rx SCI High - RW */
+/* LinkSec Tx 128-bit Key 0 - WO */
+#define E1000_LSECTXKEY0(_n) (0x0B020 + (0x04 * (_n)))
+/* LinkSec Tx 128-bit Key 1 - WO */
+#define E1000_LSECTXKEY1(_n) (0x0B030 + (0x04 * (_n)))
+#define E1000_LSECRXSA(_n) (0x0B310 + (0x04 * (_n))) /* Rx SAs - RW */
+#define E1000_LSECRXPN(_n) (0x0B330 + (0x04 * (_n))) /* Rx SAs - RW */
+/* LinkSec Rx Keys - where _n is the SA no. and _m the 4 dwords of the 128 bit
+ * key - RW.
+ */
+#define E1000_LSECRXKEY(_n, _m) (0x0B350 + (0x10 * (_n)) + (0x04 * (_m)))
+
+#define E1000_SSVPC 0x041A0 /* Switch Security Violation Pkt Cnt */
+#define E1000_IPSCTRL 0xB430 /* IpSec Control Register */
+#define E1000_IPSRXCMD 0x0B408 /* IPSec Rx Command Register - RW */
+#define E1000_IPSRXIDX 0x0B400 /* IPSec Rx Index - RW */
+/* IPSec Rx IPv4/v6 Address - RW */
+#define E1000_IPSRXIPADDR(_n) (0x0B420 + (0x04 * (_n)))
+/* IPSec Rx 128-bit Key - RW */
+#define E1000_IPSRXKEY(_n) (0x0B410 + (0x04 * (_n)))
+#define E1000_IPSRXSALT 0x0B404 /* IPSec Rx Salt - RW */
+#define E1000_IPSRXSPI 0x0B40C /* IPSec Rx SPI - RW */
+/* IPSec Tx 128-bit Key - RW */
+#define E1000_IPSTXKEY(_n) (0x0B460 + (0x04 * (_n)))
+#define E1000_IPSTXSALT 0x0B454 /* IPSec Tx Salt - RW */
+#define E1000_IPSTXIDX 0x0B450 /* IPSec Tx SA IDX - RW */
+#define E1000_PCS_CFG0 0x04200 /* PCS Configuration 0 - RW */
+#define E1000_PCS_LCTL 0x04208 /* PCS Link Control - RW */
+#define E1000_PCS_LSTAT 0x0420C /* PCS Link Status - RO */
+#define E1000_CBTMPC 0x0402C /* Circuit Breaker Tx Packet Count */
+#define E1000_HTDPMC 0x0403C /* Host Transmit Discarded Packets */
+#define E1000_CBRDPC 0x04044 /* Circuit Breaker Rx Dropped Count */
+#define E1000_CBRMPC 0x040FC /* Circuit Breaker Rx Packet Count */
+#define E1000_RPTHC 0x04104 /* Rx Packets To Host */
+#define E1000_HGPTC 0x04118 /* Host Good Packets Tx Count */
+#define E1000_HTCBDPC 0x04124 /* Host Tx Circuit Breaker Dropped Count */
+#define E1000_HGORCL 0x04128 /* Host Good Octets Received Count Low */
+#define E1000_HGORCH 0x0412C /* Host Good Octets Received Count High */
+#define E1000_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */
+#define E1000_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */
+#define E1000_LENERRS 0x04138 /* Length Errors Count */
+#define E1000_SCVPC 0x04228 /* SerDes/SGMII Code Violation Pkt Count */
+#define E1000_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */
+#define E1000_PCS_ANADV 0x04218 /* AN advertisement - RW */
+#define E1000_PCS_LPAB 0x0421C /* Link Partner Ability - RW */
+#define E1000_PCS_NPTX 0x04220 /* AN Next Page Transmit - RW */
+#define E1000_PCS_LPABNP 0x04224 /* Link Partner Ability Next Pg - RW */
+#define E1000_RXCSUM 0x05000 /* Rx Checksum Control - RW */
+#define E1000_RLPML 0x05004 /* Rx Long Packet Max Length */
+#define E1000_RFCTL 0x05008 /* Receive Filter Control*/
+#define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */
+#define E1000_RA 0x05400 /* Receive Address - RW Array */
+#define E1000_RA2 0x054E0 /* 2nd half of Rx address array - RW Array */
+#define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */
+#define E1000_VT_CTL 0x0581C /* VMDq Control - RW */
+#define E1000_CIAA 0x05B88 /* Config Indirect Access Address - RW */
+#define E1000_CIAD 0x05B8C /* Config Indirect Access Data - RW */
+#define E1000_VFQA0 0x0B000 /* VLAN Filter Queue Array 0 - RW Array */
+#define E1000_VFQA1 0x0B200 /* VLAN Filter Queue Array 1 - RW Array */
+#define E1000_WUC 0x05800 /* Wakeup Control - RW */
+#define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */
+#define E1000_WUS 0x05810 /* Wakeup Status - RO */
+#define E1000_MANC 0x05820 /* Management Control - RW */
+#define E1000_IPAV 0x05838 /* IP Address Valid - RW */
+#define E1000_IP4AT 0x05840 /* IPv4 Address Table - RW Array */
+#define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */
+#define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */
+#define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */
+#define E1000_PBACL 0x05B68 /* MSIx PBA Clear - Read/Write 1's to clear */
+#define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */
+#define E1000_HOST_IF 0x08800 /* Host Interface */
+#define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */
+#define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */
+#define E1000_HIBBA 0x8F40 /* Host Interface Buffer Base Address */
+/* Flexible Host Filter Table */
+#define E1000_FHFT(_n) (0x09000 + ((_n) * 0x100))
+/* Ext Flexible Host Filter Table */
+#define E1000_FHFT_EXT(_n) (0x09A00 + ((_n) * 0x100))
+
+
+#define E1000_KMRNCTRLSTA 0x00034 /* MAC-PHY interface - RW */
+#define E1000_MANC2H 0x05860 /* Management Control To Host - RW */
+/* Management Decision Filters */
+#define E1000_MDEF(_n) (0x05890 + (4 * (_n)))
+#define E1000_SW_FW_SYNC 0x05B5C /* SW-FW Synchronization - RW */
+#define E1000_CCMCTL 0x05B48 /* CCM Control Register */
+#define E1000_GIOCTL 0x05B44 /* GIO Analog Control Register */
+#define E1000_SCCTL 0x05B4C /* PCIc PLL Configuration Register */
+#define E1000_GCR 0x05B00 /* PCI-Ex Control */
+#define E1000_GCR2 0x05B64 /* PCI-Ex Control #2 */
+#define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */
+#define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */
+#define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */
+#define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */
+#define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */
+#define E1000_SWSM 0x05B50 /* SW Semaphore */
+#define E1000_FWSM 0x05B54 /* FW Semaphore */
+/* Driver-only SW semaphore (not used by BOOT agents) */
+#define E1000_SWSM2 0x05B58
+#define E1000_DCA_ID 0x05B70 /* DCA Requester ID Information - RO */
+#define E1000_DCA_CTRL 0x05B74 /* DCA Control - RW */
+#define E1000_UFUSE 0x05B78 /* UFUSE - RO */
+#define E1000_FFLT_DBG 0x05F04 /* Debug Register */
+#define E1000_HICR 0x08F00 /* Host Interface Control */
+#define E1000_FWSTS 0x08F0C /* FW Status */
+
+/* RSS registers */
+#define E1000_CPUVEC 0x02C10 /* CPU Vector Register - RW */
+#define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */
+#define E1000_IMIR(_i) (0x05A80 + ((_i) * 4)) /* Immediate Interrupt */
+#define E1000_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* Immediate INTR Ext*/
+#define E1000_IMIRVP 0x05AC0 /* Immediate INT Rx VLAN Priority -RW */
+#define E1000_MSIXBM(_i) (0x01600 + ((_i) * 4)) /* MSI-X Alloc Reg -RW */
+#define E1000_RETA(_i) (0x05C00 + ((_i) * 4)) /* Redirection Table - RW */
+#define E1000_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* RSS Random Key - RW */
+#define E1000_RSSIM 0x05864 /* RSS Interrupt Mask */
+#define E1000_RSSIR 0x05868 /* RSS Interrupt Request */
+/* VT Registers */
+#define E1000_SWPBS 0x03004 /* Switch Packet Buffer Size - RW */
+#define E1000_MBVFICR 0x00C80 /* Mailbox VF Cause - RWC */
+#define E1000_MBVFIMR 0x00C84 /* Mailbox VF int Mask - RW */
+#define E1000_VFLRE 0x00C88 /* VF Register Events - RWC */
+#define E1000_VFRE 0x00C8C /* VF Receive Enables */
+#define E1000_VFTE 0x00C90 /* VF Transmit Enables */
+#define E1000_QDE 0x02408 /* Queue Drop Enable - RW */
+#define E1000_DTXSWC 0x03500 /* DMA Tx Switch Control - RW */
+#define E1000_WVBR 0x03554 /* VM Wrong Behavior - RWS */
+#define E1000_RPLOLR 0x05AF0 /* Replication Offload - RW */
+#define E1000_UTA 0x0A000 /* Unicast Table Array - RW */
+#define E1000_IOVTCL 0x05BBC /* IOV Control Register */
+#define E1000_VMRCTL 0X05D80 /* Virtual Mirror Rule Control */
+#define E1000_VMRVLAN 0x05D90 /* Virtual Mirror Rule VLAN */
+#define E1000_VMRVM 0x05DA0 /* Virtual Mirror Rule VM */
+#define E1000_MDFB 0x03558 /* Malicious Driver free block */
+#define E1000_LVMMC 0x03548 /* Last VM Misbehavior cause */
+#define E1000_TXSWC 0x05ACC /* Tx Switch Control */
+#define E1000_SCCRL 0x05DB0 /* Storm Control Control */
+#define E1000_BSCTRH 0x05DB8 /* Broadcast Storm Control Threshold */
+#define E1000_MSCTRH 0x05DBC /* Multicast Storm Control Threshold */
+/* These act per VF so an array friendly macro is used */
+#define E1000_V2PMAILBOX(_n) (0x00C40 + (4 * (_n)))
+#define E1000_P2VMAILBOX(_n) (0x00C00 + (4 * (_n)))
+#define E1000_VMBMEM(_n) (0x00800 + (64 * (_n)))
+#define E1000_VFVMBMEM(_n) (0x00800 + (_n))
+#define E1000_VMOLR(_n) (0x05AD0 + (4 * (_n)))
+/* VLAN Virtual Machine Filter - RW */
+#define E1000_VLVF(_n) (0x05D00 + (4 * (_n)))
+#define E1000_VMVIR(_n) (0x03700 + (4 * (_n)))
+#define E1000_DVMOLR(_n) (0x0C038 + (0x40 * (_n))) /* DMA VM offload */
+#define E1000_VTCTRL(_n) (0x10000 + (0x100 * (_n))) /* VT Control */
+#define E1000_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */
+#define E1000_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */
+#define E1000_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */
+#define E1000_RXSTMPL 0x0B624 /* Rx timestamp Low - RO */
+#define E1000_RXSTMPH 0x0B628 /* Rx timestamp High - RO */
+#define E1000_RXSATRL 0x0B62C /* Rx timestamp attribute low - RO */
+#define E1000_RXSATRH 0x0B630 /* Rx timestamp attribute high - RO */
+#define E1000_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */
+#define E1000_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */
+#define E1000_SYSTIML 0x0B600 /* System time register Low - RO */
+#define E1000_SYSTIMH 0x0B604 /* System time register High - RO */
+#define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */
+#define E1000_TIMADJL 0x0B60C /* Time sync time adjustment offset Low - RW */
+#define E1000_TIMADJH 0x0B610 /* Time sync time adjustment offset High - RW */
+#define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */
+#define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */
+#define E1000_TSICR 0x0B66C /* Interrupt Cause Register */
+#define E1000_TSIM 0x0B674 /* Interrupt Mask Register */
+
+/* Filtering Registers */
+#define E1000_SAQF(_n) (0x05980 + (4 * (_n))) /* Source Address Queue Fltr */
+#define E1000_DAQF(_n) (0x059A0 + (4 * (_n))) /* Dest Address Queue Fltr */
+#define E1000_SPQF(_n) (0x059C0 + (4 * (_n))) /* Source Port Queue Fltr */
+#define E1000_FTQF(_n) (0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */
+#define E1000_TTQF(_n) (0x059E0 + (4 * (_n))) /* 2-tuple Queue Fltr */
+#define E1000_SYNQF(_n) (0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */
+#define E1000_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
+
+#define E1000_RTTDCS 0x3600 /* Reedtown Tx Desc plane control and status */
+#define E1000_RTTPCS 0x3474 /* Reedtown Tx Packet Plane control and status */
+#define E1000_RTRPCS 0x2474 /* Rx packet plane control and status */
+#define E1000_RTRUP2TC 0x05AC4 /* Rx User Priority to Traffic Class */
+#define E1000_RTTUP2TC 0x0418 /* Transmit User Priority to Traffic Class */
+/* Tx Desc plane TC Rate-scheduler config */
+#define E1000_RTTDTCRC(_n) (0x3610 + ((_n) * 4))
+/* Tx Packet plane TC Rate-Scheduler Config */
+#define E1000_RTTPTCRC(_n) (0x3480 + ((_n) * 4))
+/* Rx Packet plane TC Rate-Scheduler Config */
+#define E1000_RTRPTCRC(_n) (0x2480 + ((_n) * 4))
+/* Tx Desc Plane TC Rate-Scheduler Status */
+#define E1000_RTTDTCRS(_n) (0x3630 + ((_n) * 4))
+/* Tx Desc Plane TC Rate-Scheduler MMW */
+#define E1000_RTTDTCRM(_n) (0x3650 + ((_n) * 4))
+/* Tx Packet plane TC Rate-Scheduler Status */
+#define E1000_RTTPTCRS(_n) (0x34A0 + ((_n) * 4))
+/* Tx Packet plane TC Rate-scheduler MMW */
+#define E1000_RTTPTCRM(_n) (0x34C0 + ((_n) * 4))
+/* Rx Packet plane TC Rate-Scheduler Status */
+#define E1000_RTRPTCRS(_n) (0x24A0 + ((_n) * 4))
+/* Rx Packet plane TC Rate-Scheduler MMW */
+#define E1000_RTRPTCRM(_n) (0x24C0 + ((_n) * 4))
+/* Tx Desc plane VM Rate-Scheduler MMW*/
+#define E1000_RTTDVMRM(_n) (0x3670 + ((_n) * 4))
+/* Tx BCN Rate-Scheduler MMW */
+#define E1000_RTTBCNRM(_n) (0x3690 + ((_n) * 4))
+#define E1000_RTTDQSEL 0x3604 /* Tx Desc Plane Queue Select */
+#define E1000_RTTDVMRC 0x3608 /* Tx Desc Plane VM Rate-Scheduler Config */
+#define E1000_RTTDVMRS 0x360C /* Tx Desc Plane VM Rate-Scheduler Status */
+#define E1000_RTTBCNRC 0x36B0 /* Tx BCN Rate-Scheduler Config */
+#define E1000_RTTBCNRS 0x36B4 /* Tx BCN Rate-Scheduler Status */
+#define E1000_RTTBCNCR 0xB200 /* Tx BCN Control Register */
+#define E1000_RTTBCNTG 0x35A4 /* Tx BCN Tagging */
+#define E1000_RTTBCNCP 0xB208 /* Tx BCN Congestion point */
+#define E1000_RTRBCNCR 0xB20C /* Rx BCN Control Register */
+#define E1000_RTTBCNRD 0x36B8 /* Tx BCN Rate Drift */
+#define E1000_PFCTOP 0x1080 /* Priority Flow Control Type and Opcode */
+#define E1000_RTTBCNIDX 0xB204 /* Tx BCN Congestion Point */
+#define E1000_RTTBCNACH 0x0B214 /* Tx BCN Control High */
+#define E1000_RTTBCNACL 0x0B210 /* Tx BCN Control Low */
+
+/* DMA Coalescing registers */
+#define E1000_DMACR 0x02508 /* Control Register */
+#define E1000_DMCTXTH 0x03550 /* Transmit Threshold */
+#define E1000_DMCTLX 0x02514 /* Time to Lx Request */
+#define E1000_DMCRTRH 0x05DD0 /* Receive Packet Rate Threshold */
+#define E1000_DMCCNT 0x05DD4 /* Current Rx Count */
+#define E1000_FCRTC 0x02170 /* Flow Control Rx high watermark */
+#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */
+
+/* PCIe Parity Status Register */
+#define E1000_PCIEERRSTS 0x05BA8
+
+#define E1000_PROXYS 0x5F64 /* Proxying Status */
+#define E1000_PROXYFC 0x5F60 /* Proxying Filter Control */
+/* Thermal sensor configuration and status registers */
+#define E1000_THMJT 0x08100 /* Junction Temperature */
+#define E1000_THLOWTC 0x08104 /* Low Threshold Control */
+#define E1000_THMIDTC 0x08108 /* Mid Threshold Control */
+#define E1000_THHIGHTC 0x0810C /* High Threshold Control */
+#define E1000_THSTAT 0x08110 /* Thermal Sensor Status */
+
+/* Energy Efficient Ethernet "EEE" registers */
+#define E1000_IPCNFG 0x0E38 /* Internal PHY Configuration */
+#define E1000_LTRC 0x01A0 /* Latency Tolerance Reporting Control */
+#define E1000_EEER 0x0E30 /* Energy Efficient Ethernet "EEE"*/
+#define E1000_EEE_SU 0x0E34 /* EEE Setup */
+#define E1000_TLPIC 0x4148 /* EEE Tx LPI Count - TLPIC */
+#define E1000_RLPIC 0x414C /* EEE Rx LPI Count - RLPIC */
+
+/* OS2BMC Registers */
+#define E1000_B2OSPC 0x08FE0 /* BMC2OS packets sent by BMC */
+#define E1000_B2OGPRC 0x04158 /* BMC2OS packets received by host */
+#define E1000_O2BGPTC 0x08FE4 /* OS2BMC packets received by BMC */
+#define E1000_O2BSPC 0x0415C /* OS2BMC packets transmitted by host */
+
+
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
new file mode 100644
index 00000000..e5554ca3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
@@ -0,0 +1,859 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* Linux PRO/1000 Ethernet Driver main header file */
+
+#ifndef _IGB_H_
+#define _IGB_H_
+
+#include <linux/kobject.h>
+
+#ifndef IGB_NO_LRO
+#include <net/tcp.h>
+#endif
+
+#undef HAVE_HW_TIME_STAMP
+#ifdef HAVE_HW_TIME_STAMP
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+
+#endif
+#ifdef SIOCETHTOOL
+#include <linux/ethtool.h>
+#endif
+
+struct igb_adapter;
+
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+//#define IGB_DCA
+#endif
+#ifdef IGB_DCA
+#include <linux/dca.h>
+#endif
+
+#include "kcompat.h"
+
+#ifdef HAVE_SCTP
+#include <linux/sctp.h>
+#endif
+
+#include "e1000_api.h"
+#include "e1000_82575.h"
+#include "e1000_manage.h"
+#include "e1000_mbx.h"
+
+#define IGB_ERR(args...) printk(KERN_ERR "igb: " args)
+
+#define PFX "igb: "
+#define DPRINTK(nlevel, klevel, fmt, args...) \
+ (void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \
+ printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \
+ __FUNCTION__ , ## args))
+
+#ifdef HAVE_PTP_1588_CLOCK
+#include <linux/clocksource.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+#endif /* HAVE_PTP_1588_CLOCK */
+
+#ifdef HAVE_I2C_SUPPORT
+#include <linux/i2c.h>
+#include <linux/i2c-algo-bit.h>
+#endif /* HAVE_I2C_SUPPORT */
+
+/* Interrupt defines */
+#define IGB_START_ITR 648 /* ~6000 ints/sec */
+#define IGB_4K_ITR 980
+#define IGB_20K_ITR 196
+#define IGB_70K_ITR 56
+
+/* Interrupt modes, as used by the IntMode parameter */
+#define IGB_INT_MODE_LEGACY 0
+#define IGB_INT_MODE_MSI 1
+#define IGB_INT_MODE_MSIX 2
+
+/* TX/RX descriptor defines */
+#define IGB_DEFAULT_TXD 256
+#define IGB_DEFAULT_TX_WORK 128
+#define IGB_MIN_TXD 80
+#define IGB_MAX_TXD 4096
+
+#define IGB_DEFAULT_RXD 256
+#define IGB_MIN_RXD 80
+#define IGB_MAX_RXD 4096
+
+#define IGB_MIN_ITR_USECS 10 /* 100k irq/sec */
+#define IGB_MAX_ITR_USECS 8191 /* 120 irq/sec */
+
+#define NON_Q_VECTORS 1
+#define MAX_Q_VECTORS 10
+
+/* Transmit and receive queues */
+#define IGB_MAX_RX_QUEUES 16
+#define IGB_MAX_TX_QUEUES 16
+
+#define IGB_MAX_VF_MC_ENTRIES 30
+#define IGB_MAX_VF_FUNCTIONS 8
+#define IGB_82576_VF_DEV_ID 0x10CA
+#define IGB_I350_VF_DEV_ID 0x1520
+#define IGB_MAX_UTA_ENTRIES 128
+#define MAX_EMULATION_MAC_ADDRS 16
+#define OUI_LEN 3
+#define IGB_MAX_VMDQ_QUEUES 8
+
+
+struct vf_data_storage {
+ unsigned char vf_mac_addresses[ETH_ALEN];
+ u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES];
+ u16 num_vf_mc_hashes;
+ u16 default_vf_vlan_id;
+ u16 vlans_enabled;
+ unsigned char em_mac_addresses[MAX_EMULATION_MAC_ADDRS * ETH_ALEN];
+ u32 uta_table_copy[IGB_MAX_UTA_ENTRIES];
+ u32 flags;
+ unsigned long last_nack;
+#ifdef IFLA_VF_MAX
+ u16 pf_vlan; /* When set, guest VLAN config not allowed. */
+ u16 pf_qos;
+ u16 tx_rate;
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+ bool spoofchk_enabled;
+#endif
+#endif
+};
+
+#define IGB_VF_FLAG_CTS 0x00000001 /* VF is clear to send data */
+#define IGB_VF_FLAG_UNI_PROMISC 0x00000002 /* VF has unicast promisc */
+#define IGB_VF_FLAG_MULTI_PROMISC 0x00000004 /* VF has multicast promisc */
+#define IGB_VF_FLAG_PF_SET_MAC 0x00000008 /* PF has set MAC address */
+
+/* RX descriptor control thresholds.
+ * PTHRESH - MAC will consider prefetch if it has fewer than this number of
+ * descriptors available in its onboard memory.
+ * Setting this to 0 disables RX descriptor prefetch.
+ * HTHRESH - MAC will only prefetch if there are at least this many descriptors
+ * available in host memory.
+ * If PTHRESH is 0, this should also be 0.
+ * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back
+ * descriptors until either it has this many to write back, or the
+ * ITR timer expires.
+ */
+#define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : 8)
+#define IGB_RX_HTHRESH 8
+#define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8)
+#define IGB_TX_HTHRESH 1
+#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \
+ adapter->msix_entries) ? 1 : 4)
+
+/* this is the size past which hardware will drop packets when setting LPE=0 */
+#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
+
+/* NOTE: netdev_alloc_skb reserves 16 bytes, NET_IP_ALIGN means we
+ * reserve 2 more, and skb_shared_info adds an additional 384 more,
+ * this adds roughly 448 bytes of extra data meaning the smallest
+ * allocation we could have is 1K.
+ * i.e. RXBUFFER_512 --> size-1024 slab
+ */
+/* Supported Rx Buffer Sizes */
+#define IGB_RXBUFFER_256 256
+#define IGB_RXBUFFER_2048 2048
+#define IGB_RXBUFFER_16384 16384
+#define IGB_RX_HDR_LEN IGB_RXBUFFER_256
+#if MAX_SKB_FRAGS < 8
+#define IGB_RX_BUFSZ ALIGN(MAX_JUMBO_FRAME_SIZE / MAX_SKB_FRAGS, 1024)
+#else
+#define IGB_RX_BUFSZ IGB_RXBUFFER_2048
+#endif
+
+
+/* Packet Buffer allocations */
+#define IGB_PBA_BYTES_SHIFT 0xA
+#define IGB_TX_HEAD_ADDR_SHIFT 7
+#define IGB_PBA_TX_MASK 0xFFFF0000
+
+#define IGB_FC_PAUSE_TIME 0x0680 /* 858 usec */
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IGB_RX_BUFFER_WRITE 16 /* Must be power of 2 */
+
+#define IGB_EEPROM_APME 0x0400
+#define AUTO_ALL_MODES 0
+
+#ifndef IGB_MASTER_SLAVE
+/* Switch to override PHY master/slave setting */
+#define IGB_MASTER_SLAVE e1000_ms_hw_default
+#endif
+
+#define IGB_MNG_VLAN_NONE -1
+
+#ifndef IGB_NO_LRO
+#define IGB_LRO_MAX 32 /*Maximum number of LRO descriptors*/
+struct igb_lro_stats {
+ u32 flushed;
+ u32 coal;
+};
+
+/*
+ * igb_lro_header - header format to be aggregated by LRO
+ * @iph: IP header without options
+ * @tcp: TCP header
+ * @ts: Optional TCP timestamp data in TCP options
+ *
+ * This structure relies on the check above that verifies that the header
+ * is IPv4 and does not contain any options.
+ */
+struct igb_lrohdr {
+ struct iphdr iph;
+ struct tcphdr th;
+ __be32 ts[0];
+};
+
+struct igb_lro_list {
+ struct sk_buff_head active;
+ struct igb_lro_stats stats;
+};
+
+#endif /* IGB_NO_LRO */
+struct igb_cb {
+#ifndef IGB_NO_LRO
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ union { /* Union defining head/tail partner */
+ struct sk_buff *head;
+ struct sk_buff *tail;
+ };
+#endif
+ __be32 tsecr; /* timestamp echo response */
+ u32 tsval; /* timestamp value in host order */
+ u32 next_seq; /* next expected sequence number */
+ u16 free; /* 65521 minus total size */
+ u16 mss; /* size of data portion of packet */
+ u16 append_cnt; /* number of skb's appended */
+#endif /* IGB_NO_LRO */
+#ifdef HAVE_VLAN_RX_REGISTER
+ u16 vid; /* VLAN tag */
+#endif
+};
+#define IGB_CB(skb) ((struct igb_cb *)(skb)->cb)
+
+enum igb_tx_flags {
+ /* cmd_type flags */
+ IGB_TX_FLAGS_VLAN = 0x01,
+ IGB_TX_FLAGS_TSO = 0x02,
+ IGB_TX_FLAGS_TSTAMP = 0x04,
+
+ /* olinfo flags */
+ IGB_TX_FLAGS_IPV4 = 0x10,
+ IGB_TX_FLAGS_CSUM = 0x20,
+};
+
+/* VLAN info */
+#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
+#define IGB_TX_FLAGS_VLAN_SHIFT 16
+
+/*
+ * The largest size we can write to the descriptor is 65535. In order to
+ * maintain a power of two alignment we have to limit ourselves to 32K.
+ */
+#define IGB_MAX_TXD_PWR 15
+#define IGB_MAX_DATA_PER_TXD (1 << IGB_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGB_MAX_DATA_PER_TXD)
+#ifndef MAX_SKB_FRAGS
+#define DESC_NEEDED 4
+#elif (MAX_SKB_FRAGS < 16)
+#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4)
+#else
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
+#endif
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer */
+struct igb_tx_buffer {
+ union e1000_adv_tx_desc *next_to_watch;
+ unsigned long time_stamp;
+ struct sk_buff *skb;
+ unsigned int bytecount;
+ u16 gso_segs;
+ __be16 protocol;
+ DEFINE_DMA_UNMAP_ADDR(dma);
+ DEFINE_DMA_UNMAP_LEN(len);
+ u32 tx_flags;
+};
+
+struct igb_rx_buffer {
+ dma_addr_t dma;
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ struct sk_buff *skb;
+#else
+ struct page *page;
+ u32 page_offset;
+#endif
+};
+
+struct igb_tx_queue_stats {
+ u64 packets;
+ u64 bytes;
+ u64 restart_queue;
+};
+
+struct igb_rx_queue_stats {
+ u64 packets;
+ u64 bytes;
+ u64 drops;
+ u64 csum_err;
+ u64 alloc_failed;
+ u64 ipv4_packets; /* IPv4 headers processed */
+ u64 ipv4e_packets; /* IPv4E headers with extensions processed */
+ u64 ipv6_packets; /* IPv6 headers processed */
+ u64 ipv6e_packets; /* IPv6E headers with extensions processed */
+ u64 tcp_packets; /* TCP headers processed */
+ u64 udp_packets; /* UDP headers processed */
+ u64 sctp_packets; /* SCTP headers processed */
+ u64 nfs_packets; /* NFS headers processe */
+};
+
+struct igb_ring_container {
+ struct igb_ring *ring; /* pointer to linked list of rings */
+ unsigned int total_bytes; /* total bytes processed this int */
+ unsigned int total_packets; /* total packets processed this int */
+ u16 work_limit; /* total work allowed per interrupt */
+ u8 count; /* total number of rings in vector */
+ u8 itr; /* current ITR setting for ring */
+};
+
+struct igb_ring {
+ struct igb_q_vector *q_vector; /* backlink to q_vector */
+ struct net_device *netdev; /* back pointer to net_device */
+ struct device *dev; /* device for dma mapping */
+ union { /* array of buffer info structs */
+ struct igb_tx_buffer *tx_buffer_info;
+ struct igb_rx_buffer *rx_buffer_info;
+ };
+#ifdef HAVE_PTP_1588_CLOCK
+ unsigned long last_rx_timestamp;
+#endif /* HAVE_PTP_1588_CLOCK */
+ void *desc; /* descriptor ring memory */
+ unsigned long flags; /* ring specific flags */
+ void __iomem *tail; /* pointer to ring tail register */
+ dma_addr_t dma; /* phys address of the ring */
+ unsigned int size; /* length of desc. ring in bytes */
+
+ u16 count; /* number of desc. in the ring */
+ u8 queue_index; /* logical index of the ring*/
+ u8 reg_idx; /* physical index of the ring */
+
+ /* everything past this point are written often */
+ u16 next_to_clean;
+ u16 next_to_use;
+ u16 next_to_alloc;
+
+ union {
+ /* TX */
+ struct {
+ struct igb_tx_queue_stats tx_stats;
+ };
+ /* RX */
+ struct {
+ struct igb_rx_queue_stats rx_stats;
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ u16 rx_buffer_len;
+#else
+ struct sk_buff *skb;
+#endif
+ };
+ };
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ struct net_device *vmdq_netdev;
+ int vqueue_index; /* queue index for virtual netdev */
+#endif
+} ____cacheline_internodealigned_in_smp;
+
+struct igb_q_vector {
+ struct igb_adapter *adapter; /* backlink */
+ int cpu; /* CPU for DCA */
+ u32 eims_value; /* EIMS mask value */
+
+ u16 itr_val;
+ u8 set_itr;
+ void __iomem *itr_register;
+
+ struct igb_ring_container rx, tx;
+
+ struct napi_struct napi;
+#ifndef IGB_NO_LRO
+ struct igb_lro_list lrolist; /* LRO list for queue vector*/
+#endif
+ char name[IFNAMSIZ + 9];
+#ifndef HAVE_NETDEV_NAPI_LIST
+ struct net_device poll_dev;
+#endif
+
+ /* for dynamic allocation of rings associated with this q_vector */
+ struct igb_ring ring[0] ____cacheline_internodealigned_in_smp;
+};
+
+enum e1000_ring_flags_t {
+#ifndef HAVE_NDO_SET_FEATURES
+ IGB_RING_FLAG_RX_CSUM,
+#endif
+ IGB_RING_FLAG_RX_SCTP_CSUM,
+ IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
+ IGB_RING_FLAG_TX_CTX_IDX,
+ IGB_RING_FLAG_TX_DETECT_HANG,
+};
+
+struct igb_mac_addr {
+ u8 addr[ETH_ALEN];
+ u16 queue;
+ u16 state; /* bitmask */
+};
+#define IGB_MAC_STATE_DEFAULT 0x1
+#define IGB_MAC_STATE_MODIFIED 0x2
+#define IGB_MAC_STATE_IN_USE 0x4
+
+#define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS)
+
+#define IGB_RX_DESC(R, i) \
+ (&(((union e1000_adv_rx_desc *)((R)->desc))[i]))
+#define IGB_TX_DESC(R, i) \
+ (&(((union e1000_adv_tx_desc *)((R)->desc))[i]))
+#define IGB_TX_CTXTDESC(R, i) \
+ (&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i]))
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+#define netdev_ring(ring) \
+ ((ring->vmdq_netdev ? ring->vmdq_netdev : ring->netdev))
+#define ring_queue_index(ring) \
+ ((ring->vmdq_netdev ? ring->vqueue_index : ring->queue_index))
+#else
+#define netdev_ring(ring) (ring->netdev)
+#define ring_queue_index(ring) (ring->queue_index)
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+
+/* igb_test_staterr - tests bits within Rx descriptor status and error fields */
+static inline __le32 igb_test_staterr(union e1000_adv_rx_desc *rx_desc,
+ const u32 stat_err_bits)
+{
+ return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+/* igb_desc_unused - calculate if we have unused descriptors */
+static inline u16 igb_desc_unused(const struct igb_ring *ring)
+{
+ u16 ntc = ring->next_to_clean;
+ u16 ntu = ring->next_to_use;
+
+ return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+#ifdef CONFIG_BQL
+static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring)
+{
+ return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
+}
+#endif /* CONFIG_BQL */
+
+// #ifdef EXT_THERMAL_SENSOR_SUPPORT
+// #ifdef IGB_PROCFS
+struct igb_therm_proc_data
+{
+ struct e1000_hw *hw;
+ struct e1000_thermal_diode_data *sensor_data;
+};
+
+// #endif /* IGB_PROCFS */
+// #endif /* EXT_THERMAL_SENSOR_SUPPORT */
+
+#ifdef IGB_HWMON
+#define IGB_HWMON_TYPE_LOC 0
+#define IGB_HWMON_TYPE_TEMP 1
+#define IGB_HWMON_TYPE_CAUTION 2
+#define IGB_HWMON_TYPE_MAX 3
+
+struct hwmon_attr {
+ struct device_attribute dev_attr;
+ struct e1000_hw *hw;
+ struct e1000_thermal_diode_data *sensor;
+ char name[12];
+ };
+
+struct hwmon_buff {
+ struct device *device;
+ struct hwmon_attr *hwmon_list;
+ unsigned int n_hwmon;
+ };
+#endif /* IGB_HWMON */
+
+/* board specific private data structure */
+struct igb_adapter {
+#ifdef HAVE_VLAN_RX_REGISTER
+ /* vlgrp must be first member of structure */
+ struct vlan_group *vlgrp;
+#else
+ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+#endif
+ struct net_device *netdev;
+
+ unsigned long state;
+ unsigned int flags;
+
+ unsigned int num_q_vectors;
+ struct msix_entry *msix_entries;
+
+
+ /* TX */
+ u16 tx_work_limit;
+ u32 tx_timeout_count;
+ int num_tx_queues;
+ struct igb_ring *tx_ring[IGB_MAX_TX_QUEUES];
+
+ /* RX */
+ int num_rx_queues;
+ struct igb_ring *rx_ring[IGB_MAX_RX_QUEUES];
+
+ struct timer_list watchdog_timer;
+ struct timer_list dma_err_timer;
+ struct timer_list phy_info_timer;
+ u16 mng_vlan_id;
+ u32 bd_number;
+ u32 wol;
+ u32 en_mng_pt;
+ u16 link_speed;
+ u16 link_duplex;
+ u8 port_num;
+
+ /* Interrupt Throttle Rate */
+ u32 rx_itr_setting;
+ u32 tx_itr_setting;
+
+ struct work_struct reset_task;
+ struct work_struct watchdog_task;
+ struct work_struct dma_err_task;
+ bool fc_autoneg;
+ u8 tx_timeout_factor;
+
+#ifdef DEBUG
+ bool tx_hang_detected;
+ bool disable_hw_reset;
+#endif
+ u32 max_frame_size;
+
+ /* OS defined structs */
+ struct pci_dev *pdev;
+#ifndef HAVE_NETDEV_STATS_IN_NETDEV
+ struct net_device_stats net_stats;
+#endif
+#ifndef IGB_NO_LRO
+ struct igb_lro_stats lro_stats;
+#endif
+
+ /* structs defined in e1000_hw.h */
+ struct e1000_hw hw;
+ struct e1000_hw_stats stats;
+ struct e1000_phy_info phy_info;
+ struct e1000_phy_stats phy_stats;
+
+#ifdef ETHTOOL_TEST
+ u32 test_icr;
+ struct igb_ring test_tx_ring;
+ struct igb_ring test_rx_ring;
+#endif
+
+ int msg_enable;
+
+ struct igb_q_vector *q_vector[MAX_Q_VECTORS];
+ u32 eims_enable_mask;
+ u32 eims_other;
+
+ /* to not mess up cache alignment, always add to the bottom */
+ u32 *config_space;
+ u16 tx_ring_count;
+ u16 rx_ring_count;
+ struct vf_data_storage *vf_data;
+#ifdef IFLA_VF_MAX
+ int vf_rate_link_speed;
+#endif
+ u32 lli_port;
+ u32 lli_size;
+ unsigned int vfs_allocated_count;
+ /* Malicious Driver Detection flag. Valid only when SR-IOV is enabled */
+ bool mdd;
+ int int_mode;
+ u32 rss_queues;
+ u32 vmdq_pools;
+ char fw_version[32];
+ u32 wvbr;
+ struct igb_mac_addr *mac_table;
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ struct net_device *vmdq_netdev[IGB_MAX_VMDQ_QUEUES];
+#endif
+ int vferr_refcount;
+ int dmac;
+ u32 *shadow_vfta;
+
+ /* External Thermal Sensor support flag */
+ bool ets;
+#ifdef IGB_HWMON
+ struct hwmon_buff igb_hwmon_buff;
+#else /* IGB_HWMON */
+#ifdef IGB_PROCFS
+ struct proc_dir_entry *eth_dir;
+ struct proc_dir_entry *info_dir;
+ struct proc_dir_entry *therm_dir[E1000_MAX_SENSORS];
+ struct igb_therm_proc_data therm_data[E1000_MAX_SENSORS];
+ bool old_lsc;
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+ u32 etrack_id;
+
+#ifdef HAVE_PTP_1588_CLOCK
+ struct ptp_clock *ptp_clock;
+ struct ptp_clock_info ptp_caps;
+ struct delayed_work ptp_overflow_work;
+ struct work_struct ptp_tx_work;
+ struct sk_buff *ptp_tx_skb;
+ unsigned long ptp_tx_start;
+ unsigned long last_rx_ptp_check;
+ spinlock_t tmreg_lock;
+ struct cyclecounter cc;
+ struct timecounter tc;
+ u32 tx_hwtstamp_timeouts;
+ u32 rx_hwtstamp_cleared;
+#endif /* HAVE_PTP_1588_CLOCK */
+
+#ifdef HAVE_I2C_SUPPORT
+ struct i2c_algo_bit_data i2c_algo;
+ struct i2c_adapter i2c_adap;
+ struct i2c_client *i2c_client;
+#endif /* HAVE_I2C_SUPPORT */
+ unsigned long link_check_timeout;
+
+
+ int devrc;
+
+ int copper_tries;
+ u16 eee_advert;
+};
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+struct igb_vmdq_adapter {
+#ifdef HAVE_VLAN_RX_REGISTER
+ /* vlgrp must be first member of structure */
+ struct vlan_group *vlgrp;
+#else
+ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+#endif
+ struct igb_adapter *real_adapter;
+ struct net_device *vnetdev;
+ struct net_device_stats net_stats;
+ struct igb_ring *tx_ring;
+ struct igb_ring *rx_ring;
+};
+#endif
+
+#define IGB_FLAG_HAS_MSI (1 << 0)
+#define IGB_FLAG_DCA_ENABLED (1 << 1)
+#define IGB_FLAG_LLI_PUSH (1 << 2)
+#define IGB_FLAG_QUAD_PORT_A (1 << 3)
+#define IGB_FLAG_QUEUE_PAIRS (1 << 4)
+#define IGB_FLAG_EEE (1 << 5)
+#define IGB_FLAG_DMAC (1 << 6)
+#define IGB_FLAG_DETECT_BAD_DMA (1 << 7)
+#define IGB_FLAG_PTP (1 << 8)
+#define IGB_FLAG_RSS_FIELD_IPV4_UDP (1 << 9)
+#define IGB_FLAG_RSS_FIELD_IPV6_UDP (1 << 10)
+#define IGB_FLAG_WOL_SUPPORTED (1 << 11)
+#define IGB_FLAG_NEED_LINK_UPDATE (1 << 12)
+#define IGB_FLAG_LOOPBACK_ENABLE (1 << 13)
+#define IGB_FLAG_MEDIA_RESET (1 << 14)
+#define IGB_FLAG_MAS_ENABLE (1 << 15)
+
+/* Media Auto Sense */
+#define IGB_MAS_ENABLE_0 0X0001
+#define IGB_MAS_ENABLE_1 0X0002
+#define IGB_MAS_ENABLE_2 0X0004
+#define IGB_MAS_ENABLE_3 0X0008
+
+#define IGB_MIN_TXPBSIZE 20408
+#define IGB_TX_BUF_4096 4096
+
+#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coal Flush */
+
+/* DMA Coalescing defines */
+#define IGB_DMAC_DISABLE 0
+#define IGB_DMAC_MIN 250
+#define IGB_DMAC_500 500
+#define IGB_DMAC_EN_DEFAULT 1000
+#define IGB_DMAC_2000 2000
+#define IGB_DMAC_3000 3000
+#define IGB_DMAC_4000 4000
+#define IGB_DMAC_5000 5000
+#define IGB_DMAC_6000 6000
+#define IGB_DMAC_7000 7000
+#define IGB_DMAC_8000 8000
+#define IGB_DMAC_9000 9000
+#define IGB_DMAC_MAX 10000
+
+#define IGB_82576_TSYNC_SHIFT 19
+#define IGB_82580_TSYNC_SHIFT 24
+#define IGB_TS_HDR_LEN 16
+
+/* CEM Support */
+#define FW_HDR_LEN 0x4
+#define FW_CMD_DRV_INFO 0xDD
+#define FW_CMD_DRV_INFO_LEN 0x5
+#define FW_CMD_RESERVED 0X0
+#define FW_RESP_SUCCESS 0x1
+#define FW_UNUSED_VER 0x0
+#define FW_MAX_RETRIES 3
+#define FW_STATUS_SUCCESS 0x1
+#define FW_FAMILY_DRV_VER 0Xffffffff
+
+#define IGB_MAX_LINK_TRIES 20
+
+struct e1000_fw_hdr {
+ u8 cmd;
+ u8 buf_len;
+ union
+ {
+ u8 cmd_resv;
+ u8 ret_status;
+ } cmd_or_resp;
+ u8 checksum;
+};
+
+#pragma pack(push,1)
+struct e1000_fw_drv_info {
+ struct e1000_fw_hdr hdr;
+ u8 port_num;
+ u32 drv_version;
+ u16 pad; /* end spacing to ensure length is mult. of dword */
+ u8 pad2; /* end spacing to ensure length is mult. of dword2 */
+};
+#pragma pack(pop)
+
+enum e1000_state_t {
+ __IGB_TESTING,
+ __IGB_RESETTING,
+ __IGB_DOWN
+};
+
+extern char igb_driver_name[];
+extern char igb_driver_version[];
+
+extern int igb_up(struct igb_adapter *);
+extern void igb_down(struct igb_adapter *);
+extern void igb_reinit_locked(struct igb_adapter *);
+extern void igb_reset(struct igb_adapter *);
+extern int igb_set_spd_dplx(struct igb_adapter *, u16);
+extern int igb_setup_tx_resources(struct igb_ring *);
+extern int igb_setup_rx_resources(struct igb_ring *);
+extern void igb_free_tx_resources(struct igb_ring *);
+extern void igb_free_rx_resources(struct igb_ring *);
+extern void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
+extern void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
+extern void igb_setup_tctl(struct igb_adapter *);
+extern void igb_setup_rctl(struct igb_adapter *);
+extern netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *);
+extern void igb_unmap_and_free_tx_resource(struct igb_ring *,
+ struct igb_tx_buffer *);
+extern void igb_alloc_rx_buffers(struct igb_ring *, u16);
+extern void igb_clean_rx_ring(struct igb_ring *);
+extern void igb_update_stats(struct igb_adapter *);
+extern bool igb_has_link(struct igb_adapter *adapter);
+extern void igb_set_ethtool_ops(struct net_device *);
+extern void igb_check_options(struct igb_adapter *);
+extern void igb_power_up_link(struct igb_adapter *);
+#ifdef HAVE_PTP_1588_CLOCK
+extern void igb_ptp_init(struct igb_adapter *adapter);
+extern void igb_ptp_stop(struct igb_adapter *adapter);
+extern void igb_ptp_reset(struct igb_adapter *adapter);
+extern void igb_ptp_tx_work(struct work_struct *work);
+extern void igb_ptp_rx_hang(struct igb_adapter *adapter);
+extern void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter);
+extern void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
+ struct sk_buff *skb);
+extern void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
+ unsigned char *va,
+ struct sk_buff *skb);
+static inline void igb_ptp_rx_hwtstamp(struct igb_ring *rx_ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb);
+ skb_pull(skb, IGB_TS_HDR_LEN);
+#endif
+ return;
+ }
+
+ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS))
+ igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb);
+
+ /* Update the last_rx_timestamp timer in order to enable watchdog check
+ * for error case of latched timestamp on a dropped packet.
+ */
+ rx_ring->last_rx_timestamp = jiffies;
+}
+
+extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev,
+ struct ifreq *ifr, int cmd);
+#endif /* HAVE_PTP_1588_CLOCK */
+#ifdef ETHTOOL_OPS_COMPAT
+extern int ethtool_ioctl(struct ifreq *);
+#endif
+extern int igb_write_mc_addr_list(struct net_device *netdev);
+extern int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue);
+extern int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue);
+extern int igb_available_rars(struct igb_adapter *adapter);
+extern s32 igb_vlvf_set(struct igb_adapter *, u32, bool, u32);
+extern void igb_configure_vt_default_pool(struct igb_adapter *adapter);
+extern void igb_enable_vlan_tags(struct igb_adapter *adapter);
+#ifndef HAVE_VLAN_RX_REGISTER
+extern void igb_vlan_mode(struct net_device *, u32);
+#endif
+
+#define E1000_PCS_CFG_IGN_SD 1
+
+#ifdef IGB_HWMON
+void igb_sysfs_exit(struct igb_adapter *adapter);
+int igb_sysfs_init(struct igb_adapter *adapter);
+#else
+#ifdef IGB_PROCFS
+int igb_procfs_init(struct igb_adapter* adapter);
+void igb_procfs_exit(struct igb_adapter* adapter);
+int igb_procfs_topdir_init(void);
+void igb_procfs_topdir_exit(void);
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+
+
+
+#endif /* _IGB_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c
new file mode 100644
index 00000000..c07f9f53
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c
@@ -0,0 +1,28 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "igb.h"
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
new file mode 100644
index 00000000..af7e68a5
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
@@ -0,0 +1,2857 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* ethtool support for igb */
+
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+
+#ifdef SIOCETHTOOL
+#include <linux/ethtool.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_PM_RUNTIME */
+#include <linux/highmem.h>
+
+#include "igb.h"
+#include "igb_regtest.h"
+#include <linux/if_vlan.h>
+#ifdef ETHTOOL_GEEE
+#include <linux/mdio.h>
+#endif
+
+#ifdef ETHTOOL_OPS_COMPAT
+#include "kcompat_ethtool.c"
+#endif
+#ifdef ETHTOOL_GSTATS
+struct igb_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ int sizeof_stat;
+ int stat_offset;
+};
+
+#define IGB_STAT(_name, _stat) { \
+ .stat_string = _name, \
+ .sizeof_stat = FIELD_SIZEOF(struct igb_adapter, _stat), \
+ .stat_offset = offsetof(struct igb_adapter, _stat) \
+}
+static const struct igb_stats igb_gstrings_stats[] = {
+ IGB_STAT("rx_packets", stats.gprc),
+ IGB_STAT("tx_packets", stats.gptc),
+ IGB_STAT("rx_bytes", stats.gorc),
+ IGB_STAT("tx_bytes", stats.gotc),
+ IGB_STAT("rx_broadcast", stats.bprc),
+ IGB_STAT("tx_broadcast", stats.bptc),
+ IGB_STAT("rx_multicast", stats.mprc),
+ IGB_STAT("tx_multicast", stats.mptc),
+ IGB_STAT("multicast", stats.mprc),
+ IGB_STAT("collisions", stats.colc),
+ IGB_STAT("rx_crc_errors", stats.crcerrs),
+ IGB_STAT("rx_no_buffer_count", stats.rnbc),
+ IGB_STAT("rx_missed_errors", stats.mpc),
+ IGB_STAT("tx_aborted_errors", stats.ecol),
+ IGB_STAT("tx_carrier_errors", stats.tncrs),
+ IGB_STAT("tx_window_errors", stats.latecol),
+ IGB_STAT("tx_abort_late_coll", stats.latecol),
+ IGB_STAT("tx_deferred_ok", stats.dc),
+ IGB_STAT("tx_single_coll_ok", stats.scc),
+ IGB_STAT("tx_multi_coll_ok", stats.mcc),
+ IGB_STAT("tx_timeout_count", tx_timeout_count),
+ IGB_STAT("rx_long_length_errors", stats.roc),
+ IGB_STAT("rx_short_length_errors", stats.ruc),
+ IGB_STAT("rx_align_errors", stats.algnerrc),
+ IGB_STAT("tx_tcp_seg_good", stats.tsctc),
+ IGB_STAT("tx_tcp_seg_failed", stats.tsctfc),
+ IGB_STAT("rx_flow_control_xon", stats.xonrxc),
+ IGB_STAT("rx_flow_control_xoff", stats.xoffrxc),
+ IGB_STAT("tx_flow_control_xon", stats.xontxc),
+ IGB_STAT("tx_flow_control_xoff", stats.xofftxc),
+ IGB_STAT("rx_long_byte_count", stats.gorc),
+ IGB_STAT("tx_dma_out_of_sync", stats.doosync),
+#ifndef IGB_NO_LRO
+ IGB_STAT("lro_aggregated", lro_stats.coal),
+ IGB_STAT("lro_flushed", lro_stats.flushed),
+#endif /* IGB_LRO */
+ IGB_STAT("tx_smbus", stats.mgptc),
+ IGB_STAT("rx_smbus", stats.mgprc),
+ IGB_STAT("dropped_smbus", stats.mgpdc),
+ IGB_STAT("os2bmc_rx_by_bmc", stats.o2bgptc),
+ IGB_STAT("os2bmc_tx_by_bmc", stats.b2ospc),
+ IGB_STAT("os2bmc_tx_by_host", stats.o2bspc),
+ IGB_STAT("os2bmc_rx_by_host", stats.b2ogprc),
+#ifdef HAVE_PTP_1588_CLOCK
+ IGB_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
+ IGB_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
+#endif /* HAVE_PTP_1588_CLOCK */
+};
+
+#define IGB_NETDEV_STAT(_net_stat) { \
+ .stat_string = #_net_stat, \
+ .sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \
+ .stat_offset = offsetof(struct net_device_stats, _net_stat) \
+}
+static const struct igb_stats igb_gstrings_net_stats[] = {
+ IGB_NETDEV_STAT(rx_errors),
+ IGB_NETDEV_STAT(tx_errors),
+ IGB_NETDEV_STAT(tx_dropped),
+ IGB_NETDEV_STAT(rx_length_errors),
+ IGB_NETDEV_STAT(rx_over_errors),
+ IGB_NETDEV_STAT(rx_frame_errors),
+ IGB_NETDEV_STAT(rx_fifo_errors),
+ IGB_NETDEV_STAT(tx_fifo_errors),
+ IGB_NETDEV_STAT(tx_heartbeat_errors)
+};
+
+#define IGB_GLOBAL_STATS_LEN ARRAY_SIZE(igb_gstrings_stats)
+#define IGB_NETDEV_STATS_LEN ARRAY_SIZE(igb_gstrings_net_stats)
+#define IGB_RX_QUEUE_STATS_LEN \
+ (sizeof(struct igb_rx_queue_stats) / sizeof(u64))
+#define IGB_TX_QUEUE_STATS_LEN \
+ (sizeof(struct igb_tx_queue_stats) / sizeof(u64))
+#define IGB_QUEUE_STATS_LEN \
+ ((((struct igb_adapter *)netdev_priv(netdev))->num_rx_queues * \
+ IGB_RX_QUEUE_STATS_LEN) + \
+ (((struct igb_adapter *)netdev_priv(netdev))->num_tx_queues * \
+ IGB_TX_QUEUE_STATS_LEN))
+#define IGB_STATS_LEN \
+ (IGB_GLOBAL_STATS_LEN + IGB_NETDEV_STATS_LEN + IGB_QUEUE_STATS_LEN)
+
+#endif /* ETHTOOL_GSTATS */
+#ifdef ETHTOOL_TEST
+static const char igb_gstrings_test[][ETH_GSTRING_LEN] = {
+ "Register test (offline)", "Eeprom test (offline)",
+ "Interrupt test (offline)", "Loopback test (offline)",
+ "Link test (on/offline)"
+};
+#define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN)
+#endif /* ETHTOOL_TEST */
+
+static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ u32 status;
+
+ if (hw->phy.media_type == e1000_media_type_copper) {
+
+ ecmd->supported = (SUPPORTED_10baseT_Half |
+ SUPPORTED_10baseT_Full |
+ SUPPORTED_100baseT_Half |
+ SUPPORTED_100baseT_Full |
+ SUPPORTED_1000baseT_Full|
+ SUPPORTED_Autoneg |
+ SUPPORTED_TP |
+ SUPPORTED_Pause);
+ ecmd->advertising = ADVERTISED_TP;
+
+ if (hw->mac.autoneg == 1) {
+ ecmd->advertising |= ADVERTISED_Autoneg;
+ /* the e1000 autoneg seems to match ethtool nicely */
+ ecmd->advertising |= hw->phy.autoneg_advertised;
+ }
+
+ ecmd->port = PORT_TP;
+ ecmd->phy_address = hw->phy.addr;
+ ecmd->transceiver = XCVR_INTERNAL;
+
+ } else {
+ ecmd->supported = (SUPPORTED_1000baseT_Full |
+ SUPPORTED_100baseT_Full |
+ SUPPORTED_FIBRE |
+ SUPPORTED_Autoneg |
+ SUPPORTED_Pause);
+ if (hw->mac.type == e1000_i354)
+ ecmd->supported |= (SUPPORTED_2500baseX_Full);
+
+ ecmd->advertising = ADVERTISED_FIBRE;
+
+ switch (adapter->link_speed) {
+ case SPEED_2500:
+ ecmd->advertising = ADVERTISED_2500baseX_Full;
+ break;
+ case SPEED_1000:
+ ecmd->advertising = ADVERTISED_1000baseT_Full;
+ break;
+ case SPEED_100:
+ ecmd->advertising = ADVERTISED_100baseT_Full;
+ break;
+ default:
+ break;
+ }
+
+ if (hw->mac.autoneg == 1)
+ ecmd->advertising |= ADVERTISED_Autoneg;
+
+ ecmd->port = PORT_FIBRE;
+ ecmd->transceiver = XCVR_EXTERNAL;
+ }
+
+ if (hw->mac.autoneg != 1)
+ ecmd->advertising &= ~(ADVERTISED_Pause |
+ ADVERTISED_Asym_Pause);
+
+ if (hw->fc.requested_mode == e1000_fc_full)
+ ecmd->advertising |= ADVERTISED_Pause;
+ else if (hw->fc.requested_mode == e1000_fc_rx_pause)
+ ecmd->advertising |= (ADVERTISED_Pause |
+ ADVERTISED_Asym_Pause);
+ else if (hw->fc.requested_mode == e1000_fc_tx_pause)
+ ecmd->advertising |= ADVERTISED_Asym_Pause;
+ else
+ ecmd->advertising &= ~(ADVERTISED_Pause |
+ ADVERTISED_Asym_Pause);
+
+ status = E1000_READ_REG(hw, E1000_STATUS);
+
+ if (status & E1000_STATUS_LU) {
+ if ((hw->mac.type == e1000_i354) &&
+ (status & E1000_STATUS_2P5_SKU) &&
+ !(status & E1000_STATUS_2P5_SKU_OVER))
+ ecmd->speed = SPEED_2500;
+ else if (status & E1000_STATUS_SPEED_1000)
+ ecmd->speed = SPEED_1000;
+ else if (status & E1000_STATUS_SPEED_100)
+ ecmd->speed = SPEED_100;
+ else
+ ecmd->speed = SPEED_10;
+
+ if ((status & E1000_STATUS_FD) ||
+ hw->phy.media_type != e1000_media_type_copper)
+ ecmd->duplex = DUPLEX_FULL;
+ else
+ ecmd->duplex = DUPLEX_HALF;
+
+ } else {
+ ecmd->speed = -1;
+ ecmd->duplex = -1;
+ }
+
+ if ((hw->phy.media_type == e1000_media_type_fiber) ||
+ hw->mac.autoneg)
+ ecmd->autoneg = AUTONEG_ENABLE;
+ else
+ ecmd->autoneg = AUTONEG_DISABLE;
+#ifdef ETH_TP_MDI_X
+
+ /* MDI-X => 2; MDI =>1; Invalid =>0 */
+ if (hw->phy.media_type == e1000_media_type_copper)
+ ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X :
+ ETH_TP_MDI;
+ else
+ ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+
+#ifdef ETH_TP_MDI_AUTO
+ if (hw->phy.mdix == AUTO_ALL_MODES)
+ ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+ else
+ ecmd->eth_tp_mdix_ctrl = hw->phy.mdix;
+
+#endif
+#endif /* ETH_TP_MDI_X */
+ return 0;
+}
+
+static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+
+ if (ecmd->duplex == DUPLEX_HALF) {
+ if (!hw->dev_spec._82575.eee_disable)
+ dev_info(pci_dev_to_dev(adapter->pdev), "EEE disabled: not supported with half duplex\n");
+ hw->dev_spec._82575.eee_disable = true;
+ } else {
+ if (hw->dev_spec._82575.eee_disable)
+ dev_info(pci_dev_to_dev(adapter->pdev), "EEE enabled\n");
+ hw->dev_spec._82575.eee_disable = false;
+ }
+
+ /* When SoL/IDER sessions are active, autoneg/speed/duplex
+ * cannot be changed */
+ if (e1000_check_reset_block(hw)) {
+ dev_err(pci_dev_to_dev(adapter->pdev), "Cannot change link "
+ "characteristics when SoL/IDER is active.\n");
+ return -EINVAL;
+ }
+
+#ifdef ETH_TP_MDI_AUTO
+ /*
+ * MDI setting is only allowed when autoneg enabled because
+ * some hardware doesn't allow MDI setting when speed or
+ * duplex is forced.
+ */
+ if (ecmd->eth_tp_mdix_ctrl) {
+ if (hw->phy.media_type != e1000_media_type_copper)
+ return -EOPNOTSUPP;
+
+ if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
+ (ecmd->autoneg != AUTONEG_ENABLE)) {
+ dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+ return -EINVAL;
+ }
+ }
+
+#endif /* ETH_TP_MDI_AUTO */
+ while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+ usleep_range(1000, 2000);
+
+ if (ecmd->autoneg == AUTONEG_ENABLE) {
+ hw->mac.autoneg = 1;
+ if (hw->phy.media_type == e1000_media_type_fiber) {
+ hw->phy.autoneg_advertised = ecmd->advertising |
+ ADVERTISED_FIBRE |
+ ADVERTISED_Autoneg;
+ switch (adapter->link_speed) {
+ case SPEED_2500:
+ hw->phy.autoneg_advertised =
+ ADVERTISED_2500baseX_Full;
+ break;
+ case SPEED_1000:
+ hw->phy.autoneg_advertised =
+ ADVERTISED_1000baseT_Full;
+ break;
+ case SPEED_100:
+ hw->phy.autoneg_advertised =
+ ADVERTISED_100baseT_Full;
+ break;
+ default:
+ break;
+ }
+ } else {
+ hw->phy.autoneg_advertised = ecmd->advertising |
+ ADVERTISED_TP |
+ ADVERTISED_Autoneg;
+ }
+ ecmd->advertising = hw->phy.autoneg_advertised;
+ if (adapter->fc_autoneg)
+ hw->fc.requested_mode = e1000_fc_default;
+ } else {
+ if (igb_set_spd_dplx(adapter, ecmd->speed + ecmd->duplex)) {
+ clear_bit(__IGB_RESETTING, &adapter->state);
+ return -EINVAL;
+ }
+ }
+
+#ifdef ETH_TP_MDI_AUTO
+ /* MDI-X => 2; MDI => 1; Auto => 3 */
+ if (ecmd->eth_tp_mdix_ctrl) {
+ /* fix up the value for auto (3 => 0) as zero is mapped
+ * internally to auto
+ */
+ if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+ hw->phy.mdix = AUTO_ALL_MODES;
+ else
+ hw->phy.mdix = ecmd->eth_tp_mdix_ctrl;
+ }
+
+#endif /* ETH_TP_MDI_AUTO */
+ /* reset the link */
+ if (netif_running(adapter->netdev)) {
+ igb_down(adapter);
+ igb_up(adapter);
+ } else
+ igb_reset(adapter);
+
+ clear_bit(__IGB_RESETTING, &adapter->state);
+ return 0;
+}
+
+static u32 igb_get_link(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_mac_info *mac = &adapter->hw.mac;
+
+ /*
+ * If the link is not reported up to netdev, interrupts are disabled,
+ * and so the physical link state may have changed since we last
+ * looked. Set get_link_status to make sure that the true link
+ * state is interrogated, rather than pulling a cached and possibly
+ * stale link state from the driver.
+ */
+ if (!netif_carrier_ok(netdev))
+ mac->get_link_status = 1;
+
+ return igb_has_link(adapter);
+}
+
+static void igb_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pause)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+
+ pause->autoneg =
+ (adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+ if (hw->fc.current_mode == e1000_fc_rx_pause)
+ pause->rx_pause = 1;
+ else if (hw->fc.current_mode == e1000_fc_tx_pause)
+ pause->tx_pause = 1;
+ else if (hw->fc.current_mode == e1000_fc_full) {
+ pause->rx_pause = 1;
+ pause->tx_pause = 1;
+ }
+}
+
+static int igb_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pause)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ int retval = 0;
+
+ adapter->fc_autoneg = pause->autoneg;
+
+ while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+ usleep_range(1000, 2000);
+
+ if (adapter->fc_autoneg == AUTONEG_ENABLE) {
+ hw->fc.requested_mode = e1000_fc_default;
+ if (netif_running(adapter->netdev)) {
+ igb_down(adapter);
+ igb_up(adapter);
+ } else {
+ igb_reset(adapter);
+ }
+ } else {
+ if (pause->rx_pause && pause->tx_pause)
+ hw->fc.requested_mode = e1000_fc_full;
+ else if (pause->rx_pause && !pause->tx_pause)
+ hw->fc.requested_mode = e1000_fc_rx_pause;
+ else if (!pause->rx_pause && pause->tx_pause)
+ hw->fc.requested_mode = e1000_fc_tx_pause;
+ else if (!pause->rx_pause && !pause->tx_pause)
+ hw->fc.requested_mode = e1000_fc_none;
+
+ hw->fc.current_mode = hw->fc.requested_mode;
+
+ if (hw->phy.media_type == e1000_media_type_fiber) {
+ retval = hw->mac.ops.setup_link(hw);
+ /* implicit goto out */
+ } else {
+ retval = e1000_force_mac_fc(hw);
+ if (retval)
+ goto out;
+ e1000_set_fc_watermarks_generic(hw);
+ }
+ }
+
+out:
+ clear_bit(__IGB_RESETTING, &adapter->state);
+ return retval;
+}
+
+static u32 igb_get_msglevel(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ return adapter->msg_enable;
+}
+
+static void igb_set_msglevel(struct net_device *netdev, u32 data)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ adapter->msg_enable = data;
+}
+
+static int igb_get_regs_len(struct net_device *netdev)
+{
+#define IGB_REGS_LEN 555
+ return IGB_REGS_LEN * sizeof(u32);
+}
+
+static void igb_get_regs(struct net_device *netdev,
+ struct ethtool_regs *regs, void *p)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ u32 *regs_buff = p;
+ u8 i;
+
+ memset(p, 0, IGB_REGS_LEN * sizeof(u32));
+
+ regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id;
+
+ /* General Registers */
+ regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL);
+ regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS);
+ regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ regs_buff[3] = E1000_READ_REG(hw, E1000_MDIC);
+ regs_buff[4] = E1000_READ_REG(hw, E1000_SCTL);
+ regs_buff[5] = E1000_READ_REG(hw, E1000_CONNSW);
+ regs_buff[6] = E1000_READ_REG(hw, E1000_VET);
+ regs_buff[7] = E1000_READ_REG(hw, E1000_LEDCTL);
+ regs_buff[8] = E1000_READ_REG(hw, E1000_PBA);
+ regs_buff[9] = E1000_READ_REG(hw, E1000_PBS);
+ regs_buff[10] = E1000_READ_REG(hw, E1000_FRTIMER);
+ regs_buff[11] = E1000_READ_REG(hw, E1000_TCPTIMER);
+
+ /* NVM Register */
+ regs_buff[12] = E1000_READ_REG(hw, E1000_EECD);
+
+ /* Interrupt */
+ /* Reading EICS for EICR because they read the
+ * same but EICS does not clear on read */
+ regs_buff[13] = E1000_READ_REG(hw, E1000_EICS);
+ regs_buff[14] = E1000_READ_REG(hw, E1000_EICS);
+ regs_buff[15] = E1000_READ_REG(hw, E1000_EIMS);
+ regs_buff[16] = E1000_READ_REG(hw, E1000_EIMC);
+ regs_buff[17] = E1000_READ_REG(hw, E1000_EIAC);
+ regs_buff[18] = E1000_READ_REG(hw, E1000_EIAM);
+ /* Reading ICS for ICR because they read the
+ * same but ICS does not clear on read */
+ regs_buff[19] = E1000_READ_REG(hw, E1000_ICS);
+ regs_buff[20] = E1000_READ_REG(hw, E1000_ICS);
+ regs_buff[21] = E1000_READ_REG(hw, E1000_IMS);
+ regs_buff[22] = E1000_READ_REG(hw, E1000_IMC);
+ regs_buff[23] = E1000_READ_REG(hw, E1000_IAC);
+ regs_buff[24] = E1000_READ_REG(hw, E1000_IAM);
+ regs_buff[25] = E1000_READ_REG(hw, E1000_IMIRVP);
+
+ /* Flow Control */
+ regs_buff[26] = E1000_READ_REG(hw, E1000_FCAL);
+ regs_buff[27] = E1000_READ_REG(hw, E1000_FCAH);
+ regs_buff[28] = E1000_READ_REG(hw, E1000_FCTTV);
+ regs_buff[29] = E1000_READ_REG(hw, E1000_FCRTL);
+ regs_buff[30] = E1000_READ_REG(hw, E1000_FCRTH);
+ regs_buff[31] = E1000_READ_REG(hw, E1000_FCRTV);
+
+ /* Receive */
+ regs_buff[32] = E1000_READ_REG(hw, E1000_RCTL);
+ regs_buff[33] = E1000_READ_REG(hw, E1000_RXCSUM);
+ regs_buff[34] = E1000_READ_REG(hw, E1000_RLPML);
+ regs_buff[35] = E1000_READ_REG(hw, E1000_RFCTL);
+ regs_buff[36] = E1000_READ_REG(hw, E1000_MRQC);
+ regs_buff[37] = E1000_READ_REG(hw, E1000_VT_CTL);
+
+ /* Transmit */
+ regs_buff[38] = E1000_READ_REG(hw, E1000_TCTL);
+ regs_buff[39] = E1000_READ_REG(hw, E1000_TCTL_EXT);
+ regs_buff[40] = E1000_READ_REG(hw, E1000_TIPG);
+ regs_buff[41] = E1000_READ_REG(hw, E1000_DTXCTL);
+
+ /* Wake Up */
+ regs_buff[42] = E1000_READ_REG(hw, E1000_WUC);
+ regs_buff[43] = E1000_READ_REG(hw, E1000_WUFC);
+ regs_buff[44] = E1000_READ_REG(hw, E1000_WUS);
+ regs_buff[45] = E1000_READ_REG(hw, E1000_IPAV);
+ regs_buff[46] = E1000_READ_REG(hw, E1000_WUPL);
+
+ /* MAC */
+ regs_buff[47] = E1000_READ_REG(hw, E1000_PCS_CFG0);
+ regs_buff[48] = E1000_READ_REG(hw, E1000_PCS_LCTL);
+ regs_buff[49] = E1000_READ_REG(hw, E1000_PCS_LSTAT);
+ regs_buff[50] = E1000_READ_REG(hw, E1000_PCS_ANADV);
+ regs_buff[51] = E1000_READ_REG(hw, E1000_PCS_LPAB);
+ regs_buff[52] = E1000_READ_REG(hw, E1000_PCS_NPTX);
+ regs_buff[53] = E1000_READ_REG(hw, E1000_PCS_LPABNP);
+
+ /* Statistics */
+ regs_buff[54] = adapter->stats.crcerrs;
+ regs_buff[55] = adapter->stats.algnerrc;
+ regs_buff[56] = adapter->stats.symerrs;
+ regs_buff[57] = adapter->stats.rxerrc;
+ regs_buff[58] = adapter->stats.mpc;
+ regs_buff[59] = adapter->stats.scc;
+ regs_buff[60] = adapter->stats.ecol;
+ regs_buff[61] = adapter->stats.mcc;
+ regs_buff[62] = adapter->stats.latecol;
+ regs_buff[63] = adapter->stats.colc;
+ regs_buff[64] = adapter->stats.dc;
+ regs_buff[65] = adapter->stats.tncrs;
+ regs_buff[66] = adapter->stats.sec;
+ regs_buff[67] = adapter->stats.htdpmc;
+ regs_buff[68] = adapter->stats.rlec;
+ regs_buff[69] = adapter->stats.xonrxc;
+ regs_buff[70] = adapter->stats.xontxc;
+ regs_buff[71] = adapter->stats.xoffrxc;
+ regs_buff[72] = adapter->stats.xofftxc;
+ regs_buff[73] = adapter->stats.fcruc;
+ regs_buff[74] = adapter->stats.prc64;
+ regs_buff[75] = adapter->stats.prc127;
+ regs_buff[76] = adapter->stats.prc255;
+ regs_buff[77] = adapter->stats.prc511;
+ regs_buff[78] = adapter->stats.prc1023;
+ regs_buff[79] = adapter->stats.prc1522;
+ regs_buff[80] = adapter->stats.gprc;
+ regs_buff[81] = adapter->stats.bprc;
+ regs_buff[82] = adapter->stats.mprc;
+ regs_buff[83] = adapter->stats.gptc;
+ regs_buff[84] = adapter->stats.gorc;
+ regs_buff[86] = adapter->stats.gotc;
+ regs_buff[88] = adapter->stats.rnbc;
+ regs_buff[89] = adapter->stats.ruc;
+ regs_buff[90] = adapter->stats.rfc;
+ regs_buff[91] = adapter->stats.roc;
+ regs_buff[92] = adapter->stats.rjc;
+ regs_buff[93] = adapter->stats.mgprc;
+ regs_buff[94] = adapter->stats.mgpdc;
+ regs_buff[95] = adapter->stats.mgptc;
+ regs_buff[96] = adapter->stats.tor;
+ regs_buff[98] = adapter->stats.tot;
+ regs_buff[100] = adapter->stats.tpr;
+ regs_buff[101] = adapter->stats.tpt;
+ regs_buff[102] = adapter->stats.ptc64;
+ regs_buff[103] = adapter->stats.ptc127;
+ regs_buff[104] = adapter->stats.ptc255;
+ regs_buff[105] = adapter->stats.ptc511;
+ regs_buff[106] = adapter->stats.ptc1023;
+ regs_buff[107] = adapter->stats.ptc1522;
+ regs_buff[108] = adapter->stats.mptc;
+ regs_buff[109] = adapter->stats.bptc;
+ regs_buff[110] = adapter->stats.tsctc;
+ regs_buff[111] = adapter->stats.iac;
+ regs_buff[112] = adapter->stats.rpthc;
+ regs_buff[113] = adapter->stats.hgptc;
+ regs_buff[114] = adapter->stats.hgorc;
+ regs_buff[116] = adapter->stats.hgotc;
+ regs_buff[118] = adapter->stats.lenerrs;
+ regs_buff[119] = adapter->stats.scvpc;
+ regs_buff[120] = adapter->stats.hrmpc;
+
+ for (i = 0; i < 4; i++)
+ regs_buff[121 + i] = E1000_READ_REG(hw, E1000_SRRCTL(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[125 + i] = E1000_READ_REG(hw, E1000_PSRTYPE(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[129 + i] = E1000_READ_REG(hw, E1000_RDBAL(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[133 + i] = E1000_READ_REG(hw, E1000_RDBAH(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[137 + i] = E1000_READ_REG(hw, E1000_RDLEN(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[141 + i] = E1000_READ_REG(hw, E1000_RDH(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[145 + i] = E1000_READ_REG(hw, E1000_RDT(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[149 + i] = E1000_READ_REG(hw, E1000_RXDCTL(i));
+
+ for (i = 0; i < 10; i++)
+ regs_buff[153 + i] = E1000_READ_REG(hw, E1000_EITR(i));
+ for (i = 0; i < 8; i++)
+ regs_buff[163 + i] = E1000_READ_REG(hw, E1000_IMIR(i));
+ for (i = 0; i < 8; i++)
+ regs_buff[171 + i] = E1000_READ_REG(hw, E1000_IMIREXT(i));
+ for (i = 0; i < 16; i++)
+ regs_buff[179 + i] = E1000_READ_REG(hw, E1000_RAL(i));
+ for (i = 0; i < 16; i++)
+ regs_buff[195 + i] = E1000_READ_REG(hw, E1000_RAH(i));
+
+ for (i = 0; i < 4; i++)
+ regs_buff[211 + i] = E1000_READ_REG(hw, E1000_TDBAL(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[215 + i] = E1000_READ_REG(hw, E1000_TDBAH(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[219 + i] = E1000_READ_REG(hw, E1000_TDLEN(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[223 + i] = E1000_READ_REG(hw, E1000_TDH(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[227 + i] = E1000_READ_REG(hw, E1000_TDT(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[231 + i] = E1000_READ_REG(hw, E1000_TXDCTL(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[235 + i] = E1000_READ_REG(hw, E1000_TDWBAL(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[239 + i] = E1000_READ_REG(hw, E1000_TDWBAH(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[243 + i] = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i));
+
+ for (i = 0; i < 4; i++)
+ regs_buff[247 + i] = E1000_READ_REG(hw, E1000_IP4AT_REG(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[251 + i] = E1000_READ_REG(hw, E1000_IP6AT_REG(i));
+ for (i = 0; i < 32; i++)
+ regs_buff[255 + i] = E1000_READ_REG(hw, E1000_WUPM_REG(i));
+ for (i = 0; i < 128; i++)
+ regs_buff[287 + i] = E1000_READ_REG(hw, E1000_FFMT_REG(i));
+ for (i = 0; i < 128; i++)
+ regs_buff[415 + i] = E1000_READ_REG(hw, E1000_FFVT_REG(i));
+ for (i = 0; i < 4; i++)
+ regs_buff[543 + i] = E1000_READ_REG(hw, E1000_FFLT_REG(i));
+
+ regs_buff[547] = E1000_READ_REG(hw, E1000_TDFH);
+ regs_buff[548] = E1000_READ_REG(hw, E1000_TDFT);
+ regs_buff[549] = E1000_READ_REG(hw, E1000_TDFHS);
+ regs_buff[550] = E1000_READ_REG(hw, E1000_TDFPC);
+ if (hw->mac.type > e1000_82580) {
+ regs_buff[551] = adapter->stats.o2bgptc;
+ regs_buff[552] = adapter->stats.b2ospc;
+ regs_buff[553] = adapter->stats.o2bspc;
+ regs_buff[554] = adapter->stats.b2ogprc;
+ }
+}
+
+static int igb_get_eeprom_len(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ return adapter->hw.nvm.word_size * 2;
+}
+
+static int igb_get_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ u16 *eeprom_buff;
+ int first_word, last_word;
+ int ret_val = 0;
+ u16 i;
+
+ if (eeprom->len == 0)
+ return -EINVAL;
+
+ eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+ first_word = eeprom->offset >> 1;
+ last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+ eeprom_buff = kmalloc(sizeof(u16) *
+ (last_word - first_word + 1), GFP_KERNEL);
+ if (!eeprom_buff)
+ return -ENOMEM;
+
+ if (hw->nvm.type == e1000_nvm_eeprom_spi)
+ ret_val = e1000_read_nvm(hw, first_word,
+ last_word - first_word + 1,
+ eeprom_buff);
+ else {
+ for (i = 0; i < last_word - first_word + 1; i++) {
+ ret_val = e1000_read_nvm(hw, first_word + i, 1,
+ &eeprom_buff[i]);
+ if (ret_val)
+ break;
+ }
+ }
+
+ /* Device's eeprom is always little-endian, word addressable */
+ for (i = 0; i < last_word - first_word + 1; i++)
+ eeprom_buff[i] = le16_to_cpu(eeprom_buff[i]);
+
+ memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1),
+ eeprom->len);
+ kfree(eeprom_buff);
+
+ return ret_val;
+}
+
+static int igb_set_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ u16 *eeprom_buff;
+ void *ptr;
+ int max_len, first_word, last_word, ret_val = 0;
+ u16 i;
+
+ if (eeprom->len == 0)
+ return -EOPNOTSUPP;
+
+ if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+ return -EFAULT;
+
+ max_len = hw->nvm.word_size * 2;
+
+ first_word = eeprom->offset >> 1;
+ last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+ eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+ if (!eeprom_buff)
+ return -ENOMEM;
+
+ ptr = (void *)eeprom_buff;
+
+ if (eeprom->offset & 1) {
+ /* need read/modify/write of first changed EEPROM word */
+ /* only the second byte of the word is being modified */
+ ret_val = e1000_read_nvm(hw, first_word, 1,
+ &eeprom_buff[0]);
+ ptr++;
+ }
+ if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) {
+ /* need read/modify/write of last changed EEPROM word */
+ /* only the first byte of the word is being modified */
+ ret_val = e1000_read_nvm(hw, last_word, 1,
+ &eeprom_buff[last_word - first_word]);
+ }
+
+ /* Device's eeprom is always little-endian, word addressable */
+ for (i = 0; i < last_word - first_word + 1; i++)
+ le16_to_cpus(&eeprom_buff[i]);
+
+ memcpy(ptr, bytes, eeprom->len);
+
+ for (i = 0; i < last_word - first_word + 1; i++)
+ cpu_to_le16s(&eeprom_buff[i]);
+
+ ret_val = e1000_write_nvm(hw, first_word,
+ last_word - first_word + 1, eeprom_buff);
+
+ /* Update the checksum if write succeeded.
+ * and flush shadow RAM for 82573 controllers */
+ if (ret_val == 0)
+ e1000_update_nvm_checksum(hw);
+
+ kfree(eeprom_buff);
+ return ret_val;
+}
+
+static void igb_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ strncpy(drvinfo->driver, igb_driver_name, sizeof(drvinfo->driver) - 1);
+ strncpy(drvinfo->version, igb_driver_version, sizeof(drvinfo->version) - 1);
+
+ strncpy(drvinfo->fw_version, adapter->fw_version,
+ sizeof(drvinfo->fw_version) - 1);
+ strncpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info) -1);
+ drvinfo->n_stats = IGB_STATS_LEN;
+ drvinfo->testinfo_len = IGB_TEST_LEN;
+ drvinfo->regdump_len = igb_get_regs_len(netdev);
+ drvinfo->eedump_len = igb_get_eeprom_len(netdev);
+}
+
+static void igb_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ ring->rx_max_pending = IGB_MAX_RXD;
+ ring->tx_max_pending = IGB_MAX_TXD;
+ ring->rx_mini_max_pending = 0;
+ ring->rx_jumbo_max_pending = 0;
+ ring->rx_pending = adapter->rx_ring_count;
+ ring->tx_pending = adapter->tx_ring_count;
+ ring->rx_mini_pending = 0;
+ ring->rx_jumbo_pending = 0;
+}
+
+static int igb_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct igb_ring *temp_ring;
+ int i, err = 0;
+ u16 new_rx_count, new_tx_count;
+
+ if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+ return -EINVAL;
+
+ new_rx_count = min(ring->rx_pending, (u32)IGB_MAX_RXD);
+ new_rx_count = max(new_rx_count, (u16)IGB_MIN_RXD);
+ new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE);
+
+ new_tx_count = min(ring->tx_pending, (u32)IGB_MAX_TXD);
+ new_tx_count = max(new_tx_count, (u16)IGB_MIN_TXD);
+ new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE);
+
+ if ((new_tx_count == adapter->tx_ring_count) &&
+ (new_rx_count == adapter->rx_ring_count)) {
+ /* nothing to do */
+ return 0;
+ }
+
+ while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+ usleep_range(1000, 2000);
+
+ if (!netif_running(adapter->netdev)) {
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ adapter->tx_ring[i]->count = new_tx_count;
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ adapter->rx_ring[i]->count = new_rx_count;
+ adapter->tx_ring_count = new_tx_count;
+ adapter->rx_ring_count = new_rx_count;
+ goto clear_reset;
+ }
+
+ if (adapter->num_tx_queues > adapter->num_rx_queues)
+ temp_ring = vmalloc(adapter->num_tx_queues * sizeof(struct igb_ring));
+ else
+ temp_ring = vmalloc(adapter->num_rx_queues * sizeof(struct igb_ring));
+
+ if (!temp_ring) {
+ err = -ENOMEM;
+ goto clear_reset;
+ }
+
+ igb_down(adapter);
+
+ /*
+ * We can't just free everything and then setup again,
+ * because the ISRs in MSI-X mode get passed pointers
+ * to the tx and rx ring structs.
+ */
+ if (new_tx_count != adapter->tx_ring_count) {
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ memcpy(&temp_ring[i], adapter->tx_ring[i],
+ sizeof(struct igb_ring));
+
+ temp_ring[i].count = new_tx_count;
+ err = igb_setup_tx_resources(&temp_ring[i]);
+ if (err) {
+ while (i) {
+ i--;
+ igb_free_tx_resources(&temp_ring[i]);
+ }
+ goto err_setup;
+ }
+ }
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ igb_free_tx_resources(adapter->tx_ring[i]);
+
+ memcpy(adapter->tx_ring[i], &temp_ring[i],
+ sizeof(struct igb_ring));
+ }
+
+ adapter->tx_ring_count = new_tx_count;
+ }
+
+ if (new_rx_count != adapter->rx_ring_count) {
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ memcpy(&temp_ring[i], adapter->rx_ring[i],
+ sizeof(struct igb_ring));
+
+ temp_ring[i].count = new_rx_count;
+ err = igb_setup_rx_resources(&temp_ring[i]);
+ if (err) {
+ while (i) {
+ i--;
+ igb_free_rx_resources(&temp_ring[i]);
+ }
+ goto err_setup;
+ }
+
+ }
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ igb_free_rx_resources(adapter->rx_ring[i]);
+
+ memcpy(adapter->rx_ring[i], &temp_ring[i],
+ sizeof(struct igb_ring));
+ }
+
+ adapter->rx_ring_count = new_rx_count;
+ }
+err_setup:
+ igb_up(adapter);
+ vfree(temp_ring);
+clear_reset:
+ clear_bit(__IGB_RESETTING, &adapter->state);
+ return err;
+}
+static bool reg_pattern_test(struct igb_adapter *adapter, u64 *data,
+ int reg, u32 mask, u32 write)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 pat, val;
+ static const u32 _test[] =
+ {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF};
+ for (pat = 0; pat < ARRAY_SIZE(_test); pat++) {
+ E1000_WRITE_REG(hw, reg, (_test[pat] & write));
+ val = E1000_READ_REG(hw, reg) & mask;
+ if (val != (_test[pat] & write & mask)) {
+ dev_err(pci_dev_to_dev(adapter->pdev), "pattern test reg %04X "
+ "failed: got 0x%08X expected 0x%08X\n",
+ E1000_REGISTER(hw, reg), val, (_test[pat] & write & mask));
+ *data = E1000_REGISTER(hw, reg);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static bool reg_set_and_check(struct igb_adapter *adapter, u64 *data,
+ int reg, u32 mask, u32 write)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 val;
+ E1000_WRITE_REG(hw, reg, write & mask);
+ val = E1000_READ_REG(hw, reg);
+ if ((write & mask) != (val & mask)) {
+ dev_err(pci_dev_to_dev(adapter->pdev), "set/check reg %04X test failed:"
+ " got 0x%08X expected 0x%08X\n", reg,
+ (val & mask), (write & mask));
+ *data = E1000_REGISTER(hw, reg);
+ return 1;
+ }
+
+ return 0;
+}
+
+#define REG_PATTERN_TEST(reg, mask, write) \
+ do { \
+ if (reg_pattern_test(adapter, data, reg, mask, write)) \
+ return 1; \
+ } while (0)
+
+#define REG_SET_AND_CHECK(reg, mask, write) \
+ do { \
+ if (reg_set_and_check(adapter, data, reg, mask, write)) \
+ return 1; \
+ } while (0)
+
+static int igb_reg_test(struct igb_adapter *adapter, u64 *data)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct igb_reg_test *test;
+ u32 value, before, after;
+ u32 i, toggle;
+
+ switch (adapter->hw.mac.type) {
+ case e1000_i350:
+ case e1000_i354:
+ test = reg_test_i350;
+ toggle = 0x7FEFF3FF;
+ break;
+ case e1000_i210:
+ case e1000_i211:
+ test = reg_test_i210;
+ toggle = 0x7FEFF3FF;
+ break;
+ case e1000_82580:
+ test = reg_test_82580;
+ toggle = 0x7FEFF3FF;
+ break;
+ case e1000_82576:
+ test = reg_test_82576;
+ toggle = 0x7FFFF3FF;
+ break;
+ default:
+ test = reg_test_82575;
+ toggle = 0x7FFFF3FF;
+ break;
+ }
+
+ /* Because the status register is such a special case,
+ * we handle it separately from the rest of the register
+ * tests. Some bits are read-only, some toggle, and some
+ * are writable on newer MACs.
+ */
+ before = E1000_READ_REG(hw, E1000_STATUS);
+ value = (E1000_READ_REG(hw, E1000_STATUS) & toggle);
+ E1000_WRITE_REG(hw, E1000_STATUS, toggle);
+ after = E1000_READ_REG(hw, E1000_STATUS) & toggle;
+ if (value != after) {
+ dev_err(pci_dev_to_dev(adapter->pdev), "failed STATUS register test "
+ "got: 0x%08X expected: 0x%08X\n", after, value);
+ *data = 1;
+ return 1;
+ }
+ /* restore previous status */
+ E1000_WRITE_REG(hw, E1000_STATUS, before);
+
+ /* Perform the remainder of the register test, looping through
+ * the test table until we either fail or reach the null entry.
+ */
+ while (test->reg) {
+ for (i = 0; i < test->array_len; i++) {
+ switch (test->test_type) {
+ case PATTERN_TEST:
+ REG_PATTERN_TEST(test->reg +
+ (i * test->reg_offset),
+ test->mask,
+ test->write);
+ break;
+ case SET_READ_TEST:
+ REG_SET_AND_CHECK(test->reg +
+ (i * test->reg_offset),
+ test->mask,
+ test->write);
+ break;
+ case WRITE_NO_TEST:
+ writel(test->write,
+ (adapter->hw.hw_addr + test->reg)
+ + (i * test->reg_offset));
+ break;
+ case TABLE32_TEST:
+ REG_PATTERN_TEST(test->reg + (i * 4),
+ test->mask,
+ test->write);
+ break;
+ case TABLE64_TEST_LO:
+ REG_PATTERN_TEST(test->reg + (i * 8),
+ test->mask,
+ test->write);
+ break;
+ case TABLE64_TEST_HI:
+ REG_PATTERN_TEST((test->reg + 4) + (i * 8),
+ test->mask,
+ test->write);
+ break;
+ }
+ }
+ test++;
+ }
+
+ *data = 0;
+ return 0;
+}
+
+static int igb_eeprom_test(struct igb_adapter *adapter, u64 *data)
+{
+ *data = 0;
+
+ /* Validate NVM checksum */
+ if (e1000_validate_nvm_checksum(&adapter->hw) < 0)
+ *data = 2;
+
+ return *data;
+}
+
+static irqreturn_t igb_test_intr(int irq, void *data)
+{
+ struct igb_adapter *adapter = (struct igb_adapter *) data;
+ struct e1000_hw *hw = &adapter->hw;
+
+ adapter->test_icr |= E1000_READ_REG(hw, E1000_ICR);
+
+ return IRQ_HANDLED;
+}
+
+static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct net_device *netdev = adapter->netdev;
+ u32 mask, ics_mask, i = 0, shared_int = TRUE;
+ u32 irq = adapter->pdev->irq;
+
+ *data = 0;
+
+ /* Hook up test interrupt handler just for this test */
+ if (adapter->msix_entries) {
+ if (request_irq(adapter->msix_entries[0].vector,
+ &igb_test_intr, 0, netdev->name, adapter)) {
+ *data = 1;
+ return -1;
+ }
+ } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
+ shared_int = FALSE;
+ if (request_irq(irq,
+ igb_test_intr, 0, netdev->name, adapter)) {
+ *data = 1;
+ return -1;
+ }
+ } else if (!request_irq(irq, igb_test_intr, IRQF_PROBE_SHARED,
+ netdev->name, adapter)) {
+ shared_int = FALSE;
+ } else if (request_irq(irq, &igb_test_intr, IRQF_SHARED,
+ netdev->name, adapter)) {
+ *data = 1;
+ return -1;
+ }
+ dev_info(pci_dev_to_dev(adapter->pdev), "testing %s interrupt\n",
+ (shared_int ? "shared" : "unshared"));
+
+ /* Disable all the interrupts */
+ E1000_WRITE_REG(hw, E1000_IMC, ~0);
+ E1000_WRITE_FLUSH(hw);
+ usleep_range(10000, 20000);
+
+ /* Define all writable bits for ICS */
+ switch (hw->mac.type) {
+ case e1000_82575:
+ ics_mask = 0x37F47EDD;
+ break;
+ case e1000_82576:
+ ics_mask = 0x77D4FBFD;
+ break;
+ case e1000_82580:
+ ics_mask = 0x77DCFED5;
+ break;
+ case e1000_i350:
+ case e1000_i354:
+ ics_mask = 0x77DCFED5;
+ break;
+ case e1000_i210:
+ case e1000_i211:
+ ics_mask = 0x774CFED5;
+ break;
+ default:
+ ics_mask = 0x7FFFFFFF;
+ break;
+ }
+
+ /* Test each interrupt */
+ for (; i < 31; i++) {
+ /* Interrupt to test */
+ mask = 1 << i;
+
+ if (!(mask & ics_mask))
+ continue;
+
+ if (!shared_int) {
+ /* Disable the interrupt to be reported in
+ * the cause register and then force the same
+ * interrupt and see if one gets posted. If
+ * an interrupt was posted to the bus, the
+ * test failed.
+ */
+ adapter->test_icr = 0;
+
+ /* Flush any pending interrupts */
+ E1000_WRITE_REG(hw, E1000_ICR, ~0);
+
+ E1000_WRITE_REG(hw, E1000_IMC, mask);
+ E1000_WRITE_REG(hw, E1000_ICS, mask);
+ E1000_WRITE_FLUSH(hw);
+ usleep_range(10000, 20000);
+
+ if (adapter->test_icr & mask) {
+ *data = 3;
+ break;
+ }
+ }
+
+ /* Enable the interrupt to be reported in
+ * the cause register and then force the same
+ * interrupt and see if one gets posted. If
+ * an interrupt was not posted to the bus, the
+ * test failed.
+ */
+ adapter->test_icr = 0;
+
+ /* Flush any pending interrupts */
+ E1000_WRITE_REG(hw, E1000_ICR, ~0);
+
+ E1000_WRITE_REG(hw, E1000_IMS, mask);
+ E1000_WRITE_REG(hw, E1000_ICS, mask);
+ E1000_WRITE_FLUSH(hw);
+ usleep_range(10000, 20000);
+
+ if (!(adapter->test_icr & mask)) {
+ *data = 4;
+ break;
+ }
+
+ if (!shared_int) {
+ /* Disable the other interrupts to be reported in
+ * the cause register and then force the other
+ * interrupts and see if any get posted. If
+ * an interrupt was posted to the bus, the
+ * test failed.
+ */
+ adapter->test_icr = 0;
+
+ /* Flush any pending interrupts */
+ E1000_WRITE_REG(hw, E1000_ICR, ~0);
+
+ E1000_WRITE_REG(hw, E1000_IMC, ~mask);
+ E1000_WRITE_REG(hw, E1000_ICS, ~mask);
+ E1000_WRITE_FLUSH(hw);
+ usleep_range(10000, 20000);
+
+ if (adapter->test_icr & mask) {
+ *data = 5;
+ break;
+ }
+ }
+ }
+
+ /* Disable all the interrupts */
+ E1000_WRITE_REG(hw, E1000_IMC, ~0);
+ E1000_WRITE_FLUSH(hw);
+ usleep_range(10000, 20000);
+
+ /* Unhook test interrupt handler */
+ if (adapter->msix_entries)
+ free_irq(adapter->msix_entries[0].vector, adapter);
+ else
+ free_irq(irq, adapter);
+
+ return *data;
+}
+
+static void igb_free_desc_rings(struct igb_adapter *adapter)
+{
+ igb_free_tx_resources(&adapter->test_tx_ring);
+ igb_free_rx_resources(&adapter->test_rx_ring);
+}
+
+static int igb_setup_desc_rings(struct igb_adapter *adapter)
+{
+ struct igb_ring *tx_ring = &adapter->test_tx_ring;
+ struct igb_ring *rx_ring = &adapter->test_rx_ring;
+ struct e1000_hw *hw = &adapter->hw;
+ int ret_val;
+
+ /* Setup Tx descriptor ring and Tx buffers */
+ tx_ring->count = IGB_DEFAULT_TXD;
+ tx_ring->dev = pci_dev_to_dev(adapter->pdev);
+ tx_ring->netdev = adapter->netdev;
+ tx_ring->reg_idx = adapter->vfs_allocated_count;
+
+ if (igb_setup_tx_resources(tx_ring)) {
+ ret_val = 1;
+ goto err_nomem;
+ }
+
+ igb_setup_tctl(adapter);
+ igb_configure_tx_ring(adapter, tx_ring);
+
+ /* Setup Rx descriptor ring and Rx buffers */
+ rx_ring->count = IGB_DEFAULT_RXD;
+ rx_ring->dev = pci_dev_to_dev(adapter->pdev);
+ rx_ring->netdev = adapter->netdev;
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ rx_ring->rx_buffer_len = IGB_RX_HDR_LEN;
+#endif
+ rx_ring->reg_idx = adapter->vfs_allocated_count;
+
+ if (igb_setup_rx_resources(rx_ring)) {
+ ret_val = 2;
+ goto err_nomem;
+ }
+
+ /* set the default queue to queue 0 of PF */
+ E1000_WRITE_REG(hw, E1000_MRQC, adapter->vfs_allocated_count << 3);
+
+ /* enable receive ring */
+ igb_setup_rctl(adapter);
+ igb_configure_rx_ring(adapter, rx_ring);
+
+ igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
+
+ return 0;
+
+err_nomem:
+ igb_free_desc_rings(adapter);
+ return ret_val;
+}
+
+static void igb_phy_disable_receiver(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ /* Write out to PHY registers 29 and 30 to disable the Receiver. */
+ e1000_write_phy_reg(hw, 29, 0x001F);
+ e1000_write_phy_reg(hw, 30, 0x8FFC);
+ e1000_write_phy_reg(hw, 29, 0x001A);
+ e1000_write_phy_reg(hw, 30, 0x8FF0);
+}
+
+static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 ctrl_reg = 0;
+
+ hw->mac.autoneg = FALSE;
+
+ if (hw->phy.type == e1000_phy_m88) {
+ if (hw->phy.id != I210_I_PHY_ID) {
+ /* Auto-MDI/MDIX Off */
+ e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808);
+ /* reset to update Auto-MDI/MDIX */
+ e1000_write_phy_reg(hw, PHY_CONTROL, 0x9140);
+ /* autoneg off */
+ e1000_write_phy_reg(hw, PHY_CONTROL, 0x8140);
+ } else {
+ /* force 1000, set loopback */
+ e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0);
+ e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140);
+ }
+ } else {
+ /* enable MII loopback */
+ if (hw->phy.type == e1000_phy_82580)
+ e1000_write_phy_reg(hw, I82577_PHY_LBK_CTRL, 0x8041);
+ }
+
+ /* force 1000, set loopback */
+ e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140);
+
+ /* Now set up the MAC to the same speed/duplex as the PHY. */
+ ctrl_reg = E1000_READ_REG(hw, E1000_CTRL);
+ ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */
+ ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */
+ E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */
+ E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */
+ E1000_CTRL_FD | /* Force Duplex to FULL */
+ E1000_CTRL_SLU); /* Set link up enable bit */
+
+ if (hw->phy.type == e1000_phy_m88)
+ ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */
+
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg);
+
+ /* Disable the receiver on the PHY so when a cable is plugged in, the
+ * PHY does not begin to autoneg when a cable is reconnected to the NIC.
+ */
+ if (hw->phy.type == e1000_phy_m88)
+ igb_phy_disable_receiver(adapter);
+
+ mdelay(500);
+ return 0;
+}
+
+static int igb_set_phy_loopback(struct igb_adapter *adapter)
+{
+ return igb_integrated_phy_loopback(adapter);
+}
+
+static int igb_setup_loopback_test(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 reg;
+
+ reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
+
+ /* use CTRL_EXT to identify link type as SGMII can appear as copper */
+ if (reg & E1000_CTRL_EXT_LINK_MODE_MASK) {
+ if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) ||
+ (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) ||
+ (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) ||
+ (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) {
+
+ /* Enable DH89xxCC MPHY for near end loopback */
+ reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL);
+ reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK) |
+ E1000_MPHY_PCS_CLK_REG_OFFSET;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg);
+
+ reg = E1000_READ_REG(hw, E1000_MPHY_DATA);
+ reg |= E1000_MPHY_PCS_CLK_REG_DIGINELBEN;
+ E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg);
+ }
+
+ reg = E1000_READ_REG(hw, E1000_RCTL);
+ reg |= E1000_RCTL_LBM_TCVR;
+ E1000_WRITE_REG(hw, E1000_RCTL, reg);
+
+ E1000_WRITE_REG(hw, E1000_SCTL, E1000_ENABLE_SERDES_LOOPBACK);
+
+ reg = E1000_READ_REG(hw, E1000_CTRL);
+ reg &= ~(E1000_CTRL_RFCE |
+ E1000_CTRL_TFCE |
+ E1000_CTRL_LRST);
+ reg |= E1000_CTRL_SLU |
+ E1000_CTRL_FD;
+ E1000_WRITE_REG(hw, E1000_CTRL, reg);
+
+ /* Unset switch control to serdes energy detect */
+ reg = E1000_READ_REG(hw, E1000_CONNSW);
+ reg &= ~E1000_CONNSW_ENRGSRC;
+ E1000_WRITE_REG(hw, E1000_CONNSW, reg);
+
+ /* Unset sigdetect for SERDES loopback on
+ * 82580 and newer devices
+ */
+ if (hw->mac.type >= e1000_82580) {
+ reg = E1000_READ_REG(hw, E1000_PCS_CFG0);
+ reg |= E1000_PCS_CFG_IGN_SD;
+ E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg);
+ }
+
+ /* Set PCS register for forced speed */
+ reg = E1000_READ_REG(hw, E1000_PCS_LCTL);
+ reg &= ~E1000_PCS_LCTL_AN_ENABLE; /* Disable Autoneg*/
+ reg |= E1000_PCS_LCTL_FLV_LINK_UP | /* Force link up */
+ E1000_PCS_LCTL_FSV_1000 | /* Force 1000 */
+ E1000_PCS_LCTL_FDV_FULL | /* SerDes Full duplex */
+ E1000_PCS_LCTL_FSD | /* Force Speed */
+ E1000_PCS_LCTL_FORCE_LINK; /* Force Link */
+ E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg);
+
+ return 0;
+ }
+
+ return igb_set_phy_loopback(adapter);
+}
+
+static void igb_loopback_cleanup(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 rctl;
+ u16 phy_reg;
+
+ if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) ||
+ (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) ||
+ (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) ||
+ (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) {
+ u32 reg;
+
+ /* Disable near end loopback on DH89xxCC */
+ reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL);
+ reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK ) |
+ E1000_MPHY_PCS_CLK_REG_OFFSET;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg);
+
+ reg = E1000_READ_REG(hw, E1000_MPHY_DATA);
+ reg &= ~E1000_MPHY_PCS_CLK_REG_DIGINELBEN;
+ E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg);
+ }
+
+ rctl = E1000_READ_REG(hw, E1000_RCTL);
+ rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+
+ hw->mac.autoneg = TRUE;
+ e1000_read_phy_reg(hw, PHY_CONTROL, &phy_reg);
+ if (phy_reg & MII_CR_LOOPBACK) {
+ phy_reg &= ~MII_CR_LOOPBACK;
+ if (hw->phy.type == I210_I_PHY_ID)
+ e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0);
+ e1000_write_phy_reg(hw, PHY_CONTROL, phy_reg);
+ e1000_phy_commit(hw);
+ }
+}
+static void igb_create_lbtest_frame(struct sk_buff *skb,
+ unsigned int frame_size)
+{
+ memset(skb->data, 0xFF, frame_size);
+ frame_size /= 2;
+ memset(&skb->data[frame_size], 0xAA, frame_size - 1);
+ memset(&skb->data[frame_size + 10], 0xBE, 1);
+ memset(&skb->data[frame_size + 12], 0xAF, 1);
+}
+
+static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer,
+ unsigned int frame_size)
+{
+ unsigned char *data;
+ bool match = true;
+
+ frame_size >>= 1;
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ data = rx_buffer->skb->data;
+#else
+ data = kmap(rx_buffer->page);
+#endif
+
+ if (data[3] != 0xFF ||
+ data[frame_size + 10] != 0xBE ||
+ data[frame_size + 12] != 0xAF)
+ match = false;
+
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ kunmap(rx_buffer->page);
+
+#endif
+ return match;
+}
+
+static u16 igb_clean_test_rings(struct igb_ring *rx_ring,
+ struct igb_ring *tx_ring,
+ unsigned int size)
+{
+ union e1000_adv_rx_desc *rx_desc;
+ struct igb_rx_buffer *rx_buffer_info;
+ struct igb_tx_buffer *tx_buffer_info;
+ u16 rx_ntc, tx_ntc, count = 0;
+
+ /* initialize next to clean and descriptor values */
+ rx_ntc = rx_ring->next_to_clean;
+ tx_ntc = tx_ring->next_to_clean;
+ rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
+
+ while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
+ /* check rx buffer */
+ rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc];
+
+ /* sync Rx buffer for CPU read */
+ dma_sync_single_for_cpu(rx_ring->dev,
+ rx_buffer_info->dma,
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ IGB_RX_HDR_LEN,
+#else
+ IGB_RX_BUFSZ,
+#endif
+ DMA_FROM_DEVICE);
+
+ /* verify contents of skb */
+ if (igb_check_lbtest_frame(rx_buffer_info, size))
+ count++;
+
+ /* sync Rx buffer for device write */
+ dma_sync_single_for_device(rx_ring->dev,
+ rx_buffer_info->dma,
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ IGB_RX_HDR_LEN,
+#else
+ IGB_RX_BUFSZ,
+#endif
+ DMA_FROM_DEVICE);
+
+ /* unmap buffer on tx side */
+ tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc];
+ igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
+
+ /* increment rx/tx next to clean counters */
+ rx_ntc++;
+ if (rx_ntc == rx_ring->count)
+ rx_ntc = 0;
+ tx_ntc++;
+ if (tx_ntc == tx_ring->count)
+ tx_ntc = 0;
+
+ /* fetch next descriptor */
+ rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
+ }
+
+ /* re-map buffers to ring, store next to clean values */
+ igb_alloc_rx_buffers(rx_ring, count);
+ rx_ring->next_to_clean = rx_ntc;
+ tx_ring->next_to_clean = tx_ntc;
+
+ return count;
+}
+
+static int igb_run_loopback_test(struct igb_adapter *adapter)
+{
+ struct igb_ring *tx_ring = &adapter->test_tx_ring;
+ struct igb_ring *rx_ring = &adapter->test_rx_ring;
+ u16 i, j, lc, good_cnt;
+ int ret_val = 0;
+ unsigned int size = IGB_RX_HDR_LEN;
+ netdev_tx_t tx_ret_val;
+ struct sk_buff *skb;
+
+ /* allocate test skb */
+ skb = alloc_skb(size, GFP_KERNEL);
+ if (!skb)
+ return 11;
+
+ /* place data into test skb */
+ igb_create_lbtest_frame(skb, size);
+ skb_put(skb, size);
+
+ /*
+ * Calculate the loop count based on the largest descriptor ring
+ * The idea is to wrap the largest ring a number of times using 64
+ * send/receive pairs during each loop
+ */
+
+ if (rx_ring->count <= tx_ring->count)
+ lc = ((tx_ring->count / 64) * 2) + 1;
+ else
+ lc = ((rx_ring->count / 64) * 2) + 1;
+
+ for (j = 0; j <= lc; j++) { /* loop count loop */
+ /* reset count of good packets */
+ good_cnt = 0;
+
+ /* place 64 packets on the transmit queue*/
+ for (i = 0; i < 64; i++) {
+ skb_get(skb);
+ tx_ret_val = igb_xmit_frame_ring(skb, tx_ring);
+ if (tx_ret_val == NETDEV_TX_OK)
+ good_cnt++;
+ }
+
+ if (good_cnt != 64) {
+ ret_val = 12;
+ break;
+ }
+
+ /* allow 200 milliseconds for packets to go from tx to rx */
+ msleep(200);
+
+ good_cnt = igb_clean_test_rings(rx_ring, tx_ring, size);
+ if (good_cnt != 64) {
+ ret_val = 13;
+ break;
+ }
+ } /* end loop count loop */
+
+ /* free the original skb */
+ kfree_skb(skb);
+
+ return ret_val;
+}
+
+static int igb_loopback_test(struct igb_adapter *adapter, u64 *data)
+{
+ /* PHY loopback cannot be performed if SoL/IDER
+ * sessions are active */
+ if (e1000_check_reset_block(&adapter->hw)) {
+ dev_err(pci_dev_to_dev(adapter->pdev),
+ "Cannot do PHY loopback test "
+ "when SoL/IDER is active.\n");
+ *data = 0;
+ goto out;
+ }
+ if (adapter->hw.mac.type == e1000_i354) {
+ dev_info(&adapter->pdev->dev,
+ "Loopback test not supported on i354.\n");
+ *data = 0;
+ goto out;
+ }
+ *data = igb_setup_desc_rings(adapter);
+ if (*data)
+ goto out;
+ *data = igb_setup_loopback_test(adapter);
+ if (*data)
+ goto err_loopback;
+ *data = igb_run_loopback_test(adapter);
+
+ igb_loopback_cleanup(adapter);
+
+err_loopback:
+ igb_free_desc_rings(adapter);
+out:
+ return *data;
+}
+
+static int igb_link_test(struct igb_adapter *adapter, u64 *data)
+{
+ u32 link;
+ int i, time;
+
+ *data = 0;
+ time = 0;
+ if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
+ int i = 0;
+ adapter->hw.mac.serdes_has_link = FALSE;
+
+ /* On some blade server designs, link establishment
+ * could take as long as 2-3 minutes */
+ do {
+ e1000_check_for_link(&adapter->hw);
+ if (adapter->hw.mac.serdes_has_link)
+ goto out;
+ msleep(20);
+ } while (i++ < 3750);
+
+ *data = 1;
+ } else {
+ for (i=0; i < IGB_MAX_LINK_TRIES; i++) {
+ link = igb_has_link(adapter);
+ if (link)
+ goto out;
+ else {
+ time++;
+ msleep(1000);
+ }
+ }
+ if (!link)
+ *data = 1;
+ }
+ out:
+ return *data;
+}
+
+static void igb_diag_test(struct net_device *netdev,
+ struct ethtool_test *eth_test, u64 *data)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ u16 autoneg_advertised;
+ u8 forced_speed_duplex, autoneg;
+ bool if_running = netif_running(netdev);
+
+ set_bit(__IGB_TESTING, &adapter->state);
+ if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+ /* Offline tests */
+
+ /* save speed, duplex, autoneg settings */
+ autoneg_advertised = adapter->hw.phy.autoneg_advertised;
+ forced_speed_duplex = adapter->hw.mac.forced_speed_duplex;
+ autoneg = adapter->hw.mac.autoneg;
+
+ dev_info(pci_dev_to_dev(adapter->pdev), "offline testing starting\n");
+
+ /* power up link for link test */
+ igb_power_up_link(adapter);
+
+ /* Link test performed before hardware reset so autoneg doesn't
+ * interfere with test result */
+ if (igb_link_test(adapter, &data[4]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ if (if_running)
+ /* indicate we're in test mode */
+ dev_close(netdev);
+ else
+ igb_reset(adapter);
+
+ if (igb_reg_test(adapter, &data[0]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ igb_reset(adapter);
+ if (igb_eeprom_test(adapter, &data[1]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ igb_reset(adapter);
+ if (igb_intr_test(adapter, &data[2]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ igb_reset(adapter);
+
+ /* power up link for loopback test */
+ igb_power_up_link(adapter);
+
+ if (igb_loopback_test(adapter, &data[3]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ /* restore speed, duplex, autoneg settings */
+ adapter->hw.phy.autoneg_advertised = autoneg_advertised;
+ adapter->hw.mac.forced_speed_duplex = forced_speed_duplex;
+ adapter->hw.mac.autoneg = autoneg;
+
+ /* force this routine to wait until autoneg complete/timeout */
+ adapter->hw.phy.autoneg_wait_to_complete = TRUE;
+ igb_reset(adapter);
+ adapter->hw.phy.autoneg_wait_to_complete = FALSE;
+
+ clear_bit(__IGB_TESTING, &adapter->state);
+ if (if_running)
+ dev_open(netdev);
+ } else {
+ dev_info(pci_dev_to_dev(adapter->pdev), "online testing starting\n");
+
+ /* PHY is powered down when interface is down */
+ if (if_running && igb_link_test(adapter, &data[4]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+ else
+ data[4] = 0;
+
+ /* Online tests aren't run; pass by default */
+ data[0] = 0;
+ data[1] = 0;
+ data[2] = 0;
+ data[3] = 0;
+
+ clear_bit(__IGB_TESTING, &adapter->state);
+ }
+ msleep_interruptible(4 * 1000);
+}
+
+static void igb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ wol->supported = WAKE_UCAST | WAKE_MCAST |
+ WAKE_BCAST | WAKE_MAGIC |
+ WAKE_PHY;
+ wol->wolopts = 0;
+
+ if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED))
+ return;
+
+ /* apply any specific unsupported masks here */
+ switch (adapter->hw.device_id) {
+ default:
+ break;
+ }
+
+ if (adapter->wol & E1000_WUFC_EX)
+ wol->wolopts |= WAKE_UCAST;
+ if (adapter->wol & E1000_WUFC_MC)
+ wol->wolopts |= WAKE_MCAST;
+ if (adapter->wol & E1000_WUFC_BC)
+ wol->wolopts |= WAKE_BCAST;
+ if (adapter->wol & E1000_WUFC_MAG)
+ wol->wolopts |= WAKE_MAGIC;
+ if (adapter->wol & E1000_WUFC_LNKC)
+ wol->wolopts |= WAKE_PHY;
+}
+
+static int igb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ if (wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE))
+ return -EOPNOTSUPP;
+
+ if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED))
+ return wol->wolopts ? -EOPNOTSUPP : 0;
+
+ /* these settings will always override what we currently have */
+ adapter->wol = 0;
+
+ if (wol->wolopts & WAKE_UCAST)
+ adapter->wol |= E1000_WUFC_EX;
+ if (wol->wolopts & WAKE_MCAST)
+ adapter->wol |= E1000_WUFC_MC;
+ if (wol->wolopts & WAKE_BCAST)
+ adapter->wol |= E1000_WUFC_BC;
+ if (wol->wolopts & WAKE_MAGIC)
+ adapter->wol |= E1000_WUFC_MAG;
+ if (wol->wolopts & WAKE_PHY)
+ adapter->wol |= E1000_WUFC_LNKC;
+ device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+ return 0;
+}
+
+/* bit defines for adapter->led_status */
+#ifdef HAVE_ETHTOOL_SET_PHYS_ID
+static int igb_set_phys_id(struct net_device *netdev,
+ enum ethtool_phys_id_state state)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+
+ switch (state) {
+ case ETHTOOL_ID_ACTIVE:
+ e1000_blink_led(hw);
+ return 2;
+ case ETHTOOL_ID_ON:
+ e1000_led_on(hw);
+ break;
+ case ETHTOOL_ID_OFF:
+ e1000_led_off(hw);
+ break;
+ case ETHTOOL_ID_INACTIVE:
+ e1000_led_off(hw);
+ e1000_cleanup_led(hw);
+ break;
+ }
+
+ return 0;
+}
+#else
+static int igb_phys_id(struct net_device *netdev, u32 data)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ unsigned long timeout;
+
+ timeout = data * 1000;
+
+ /*
+ * msleep_interruptable only accepts unsigned int so we are limited
+ * in how long a duration we can wait
+ */
+ if (!timeout || timeout > UINT_MAX)
+ timeout = UINT_MAX;
+
+ e1000_blink_led(hw);
+ msleep_interruptible(timeout);
+
+ e1000_led_off(hw);
+ e1000_cleanup_led(hw);
+
+ return 0;
+}
+#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
+
+static int igb_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ int i;
+
+ if ((ec->rx_coalesce_usecs > IGB_MAX_ITR_USECS) ||
+ ((ec->rx_coalesce_usecs > 3) &&
+ (ec->rx_coalesce_usecs < IGB_MIN_ITR_USECS)) ||
+ (ec->rx_coalesce_usecs == 2))
+ {
+ printk("set_coalesce:invalid parameter..");
+ return -EINVAL;
+ }
+
+ if ((ec->tx_coalesce_usecs > IGB_MAX_ITR_USECS) ||
+ ((ec->tx_coalesce_usecs > 3) &&
+ (ec->tx_coalesce_usecs < IGB_MIN_ITR_USECS)) ||
+ (ec->tx_coalesce_usecs == 2))
+ return -EINVAL;
+
+ if ((adapter->flags & IGB_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs)
+ return -EINVAL;
+
+ if (ec->tx_max_coalesced_frames_irq)
+ adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq;
+
+ /* If ITR is disabled, disable DMAC */
+ if (ec->rx_coalesce_usecs == 0) {
+ adapter->dmac = IGB_DMAC_DISABLE;
+ }
+
+ /* convert to rate of irq's per second */
+ if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3)
+ adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+ else
+ adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+
+ /* convert to rate of irq's per second */
+ if (adapter->flags & IGB_FLAG_QUEUE_PAIRS)
+ adapter->tx_itr_setting = adapter->rx_itr_setting;
+ else if (ec->tx_coalesce_usecs && ec->tx_coalesce_usecs <= 3)
+ adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+ else
+ adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ struct igb_q_vector *q_vector = adapter->q_vector[i];
+ q_vector->tx.work_limit = adapter->tx_work_limit;
+ if (q_vector->rx.ring)
+ q_vector->itr_val = adapter->rx_itr_setting;
+ else
+ q_vector->itr_val = adapter->tx_itr_setting;
+ if (q_vector->itr_val && q_vector->itr_val <= 3)
+ q_vector->itr_val = IGB_START_ITR;
+ q_vector->set_itr = 1;
+ }
+
+ return 0;
+}
+
+static int igb_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ if (adapter->rx_itr_setting <= 3)
+ ec->rx_coalesce_usecs = adapter->rx_itr_setting;
+ else
+ ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
+
+ ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit;
+
+ if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) {
+ if (adapter->tx_itr_setting <= 3)
+ ec->tx_coalesce_usecs = adapter->tx_itr_setting;
+ else
+ ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
+ }
+
+ return 0;
+}
+
+static int igb_nway_reset(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ if (netif_running(netdev))
+ igb_reinit_locked(adapter);
+ return 0;
+}
+
+#ifdef HAVE_ETHTOOL_GET_SSET_COUNT
+static int igb_get_sset_count(struct net_device *netdev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return IGB_STATS_LEN;
+ case ETH_SS_TEST:
+ return IGB_TEST_LEN;
+ default:
+ return -ENOTSUPP;
+ }
+}
+#else
+static int igb_get_stats_count(struct net_device *netdev)
+{
+ return IGB_STATS_LEN;
+}
+
+static int igb_diag_test_count(struct net_device *netdev)
+{
+ return IGB_TEST_LEN;
+}
+#endif
+
+static void igb_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+ struct net_device_stats *net_stats = &netdev->stats;
+#else
+ struct net_device_stats *net_stats = &adapter->net_stats;
+#endif
+ u64 *queue_stat;
+ int i, j, k;
+ char *p;
+
+ igb_update_stats(adapter);
+
+ for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) {
+ p = (char *)adapter + igb_gstrings_stats[i].stat_offset;
+ data[i] = (igb_gstrings_stats[i].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+ for (j = 0; j < IGB_NETDEV_STATS_LEN; j++, i++) {
+ p = (char *)net_stats + igb_gstrings_net_stats[j].stat_offset;
+ data[i] = (igb_gstrings_net_stats[j].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+ for (j = 0; j < adapter->num_tx_queues; j++) {
+ queue_stat = (u64 *)&adapter->tx_ring[j]->tx_stats;
+ for (k = 0; k < IGB_TX_QUEUE_STATS_LEN; k++, i++)
+ data[i] = queue_stat[k];
+ }
+ for (j = 0; j < adapter->num_rx_queues; j++) {
+ queue_stat = (u64 *)&adapter->rx_ring[j]->rx_stats;
+ for (k = 0; k < IGB_RX_QUEUE_STATS_LEN; k++, i++)
+ data[i] = queue_stat[k];
+ }
+}
+
+static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ u8 *p = data;
+ int i;
+
+ switch (stringset) {
+ case ETH_SS_TEST:
+ memcpy(data, *igb_gstrings_test,
+ IGB_TEST_LEN*ETH_GSTRING_LEN);
+ break;
+ case ETH_SS_STATS:
+ for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) {
+ memcpy(p, igb_gstrings_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < IGB_NETDEV_STATS_LEN; i++) {
+ memcpy(p, igb_gstrings_net_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ sprintf(p, "tx_queue_%u_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_queue_%u_bytes", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_queue_%u_restart", i);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ sprintf(p, "rx_queue_%u_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_bytes", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_drops", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_csum_err", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_alloc_failed", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_ipv4_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_ipv4e_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_ipv6_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_ipv6e_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_tcp_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_udp_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_sctp_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_nfs_packets", i);
+ p += ETH_GSTRING_LEN;
+ }
+/* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */
+ break;
+ }
+}
+
+#ifdef HAVE_ETHTOOL_GET_TS_INFO
+static int igb_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *info)
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+
+ switch (adapter->hw.mac.type) {
+#ifdef HAVE_PTP_1588_CLOCK
+ case e1000_82575:
+ info->so_timestamping =
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE;
+ return 0;
+ case e1000_82576:
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ case e1000_i210:
+ case e1000_i211:
+ info->so_timestamping =
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ if (adapter->ptp_clock)
+ info->phc_index = ptp_clock_index(adapter->ptp_clock);
+ else
+ info->phc_index = -1;
+
+ info->tx_types =
+ (1 << HWTSTAMP_TX_OFF) |
+ (1 << HWTSTAMP_TX_ON);
+
+ info->rx_filters = 1 << HWTSTAMP_FILTER_NONE;
+
+ /* 82576 does not support timestamping all packets. */
+ if (adapter->hw.mac.type >= e1000_82580)
+ info->rx_filters |= 1 << HWTSTAMP_FILTER_ALL;
+ else
+ info->rx_filters |=
+ (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
+
+ return 0;
+#endif /* HAVE_PTP_1588_CLOCK */
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+#endif /* HAVE_ETHTOOL_GET_TS_INFO */
+
+#ifdef CONFIG_PM_RUNTIME
+static int igb_ethtool_begin(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ pm_runtime_get_sync(&adapter->pdev->dev);
+
+ return 0;
+}
+
+static void igb_ethtool_complete(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ pm_runtime_put(&adapter->pdev->dev);
+}
+#endif /* CONFIG_PM_RUNTIME */
+
+#ifndef HAVE_NDO_SET_FEATURES
+static u32 igb_get_rx_csum(struct net_device *netdev)
+{
+ return !!(netdev->features & NETIF_F_RXCSUM);
+}
+
+static int igb_set_rx_csum(struct net_device *netdev, u32 data)
+{
+ const u32 feature_list = NETIF_F_RXCSUM;
+
+ if (data)
+ netdev->features |= feature_list;
+ else
+ netdev->features &= ~feature_list;
+
+ return 0;
+}
+
+static int igb_set_tx_csum(struct net_device *netdev, u32 data)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+#ifdef NETIF_F_IPV6_CSUM
+ u32 feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+#else
+ u32 feature_list = NETIF_F_IP_CSUM;
+#endif
+
+ if (adapter->hw.mac.type >= e1000_82576)
+ feature_list |= NETIF_F_SCTP_CSUM;
+
+ if (data)
+ netdev->features |= feature_list;
+ else
+ netdev->features &= ~feature_list;
+
+ return 0;
+}
+
+#ifdef NETIF_F_TSO
+static int igb_set_tso(struct net_device *netdev, u32 data)
+{
+#ifdef NETIF_F_TSO6
+ const u32 feature_list = NETIF_F_TSO | NETIF_F_TSO6;
+#else
+ const u32 feature_list = NETIF_F_TSO;
+#endif
+
+ if (data)
+ netdev->features |= feature_list;
+ else
+ netdev->features &= ~feature_list;
+
+#ifndef HAVE_NETDEV_VLAN_FEATURES
+ if (!data) {
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct net_device *v_netdev;
+ int i;
+
+ /* disable TSO on all VLANs if they're present */
+ if (!adapter->vlgrp)
+ goto tso_out;
+
+ for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ v_netdev = vlan_group_get_device(adapter->vlgrp, i);
+ if (!v_netdev)
+ continue;
+
+ v_netdev->features &= ~feature_list;
+ vlan_group_set_device(adapter->vlgrp, i, v_netdev);
+ }
+ }
+
+tso_out:
+
+#endif /* HAVE_NETDEV_VLAN_FEATURES */
+ return 0;
+}
+
+#endif /* NETIF_F_TSO */
+#ifdef ETHTOOL_GFLAGS
+static int igb_set_flags(struct net_device *netdev, u32 data)
+{
+ u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN |
+ ETH_FLAG_RXHASH;
+#ifndef HAVE_VLAN_RX_REGISTER
+ u32 changed = netdev->features ^ data;
+#endif
+ int rc;
+#ifndef IGB_NO_LRO
+
+ supported_flags |= ETH_FLAG_LRO;
+#endif
+ /*
+ * Since there is no support for separate tx vlan accel
+ * enabled make sure tx flag is cleared if rx is.
+ */
+ if (!(data & ETH_FLAG_RXVLAN))
+ data &= ~ETH_FLAG_TXVLAN;
+
+ rc = ethtool_op_set_flags(netdev, data, supported_flags);
+ if (rc)
+ return rc;
+#ifndef HAVE_VLAN_RX_REGISTER
+
+ if (changed & ETH_FLAG_RXVLAN)
+ igb_vlan_mode(netdev, data);
+#endif
+
+ return 0;
+}
+
+#endif /* ETHTOOL_GFLAGS */
+#endif /* HAVE_NDO_SET_FEATURES */
+#ifdef ETHTOOL_SADV_COAL
+static int igb_set_adv_coal(struct net_device *netdev, struct ethtool_value *edata)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ switch (edata->data) {
+ case IGB_DMAC_DISABLE:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_MIN:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_500:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_EN_DEFAULT:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_2000:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_3000:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_4000:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_5000:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_6000:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_7000:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_8000:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_9000:
+ adapter->dmac = edata->data;
+ break;
+ case IGB_DMAC_MAX:
+ adapter->dmac = edata->data;
+ break;
+ default:
+ adapter->dmac = IGB_DMAC_DISABLE;
+ printk("set_dmac: invalid setting, setting DMAC to %d\n",
+ adapter->dmac);
+ }
+ printk("%s: setting DMAC to %d\n", netdev->name, adapter->dmac);
+ return 0;
+}
+#endif /* ETHTOOL_SADV_COAL */
+#ifdef ETHTOOL_GADV_COAL
+static void igb_get_dmac(struct net_device *netdev,
+ struct ethtool_value *edata)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ edata->data = adapter->dmac;
+
+ return;
+}
+#endif
+
+#ifdef ETHTOOL_GEEE
+static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ u32 ret_val;
+ u16 phy_data;
+
+ if ((hw->mac.type < e1000_i350) ||
+ (hw->phy.media_type != e1000_media_type_copper))
+ return -EOPNOTSUPP;
+
+ edata->supported = (SUPPORTED_1000baseT_Full |
+ SUPPORTED_100baseT_Full);
+
+ if (!hw->dev_spec._82575.eee_disable)
+ edata->advertised =
+ mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert);
+
+ /* The IPCNFG and EEER registers are not supported on I354. */
+ if (hw->mac.type == e1000_i354) {
+ e1000_get_eee_status_i354(hw, (bool *)&edata->eee_active);
+ } else {
+ u32 eeer;
+
+ eeer = E1000_READ_REG(hw, E1000_EEER);
+
+ /* EEE status on negotiated link */
+ if (eeer & E1000_EEER_EEE_NEG)
+ edata->eee_active = true;
+
+ if (eeer & E1000_EEER_TX_LPI_EN)
+ edata->tx_lpi_enabled = true;
+ }
+
+ /* EEE Link Partner Advertised */
+ switch (hw->mac.type) {
+ case e1000_i350:
+ ret_val = e1000_read_emi_reg(hw, E1000_EEE_LP_ADV_ADDR_I350,
+ &phy_data);
+ if (ret_val)
+ return -ENODATA;
+
+ edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
+
+ break;
+ case e1000_i354:
+ case e1000_i210:
+ case e1000_i211:
+ ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_LP_ADV_ADDR_I210,
+ E1000_EEE_LP_ADV_DEV_I210,
+ &phy_data);
+ if (ret_val)
+ return -ENODATA;
+
+ edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
+
+ break;
+ default:
+ break;
+ }
+
+ edata->eee_enabled = !hw->dev_spec._82575.eee_disable;
+
+ if ((hw->mac.type == e1000_i354) &&
+ (edata->eee_enabled))
+ edata->tx_lpi_enabled = true;
+
+ /*
+ * report correct negotiated EEE status for devices that
+ * wrongly report EEE at half-duplex
+ */
+ if (adapter->link_duplex == HALF_DUPLEX) {
+ edata->eee_enabled = false;
+ edata->eee_active = false;
+ edata->tx_lpi_enabled = false;
+ edata->advertised &= ~edata->advertised;
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef ETHTOOL_SEEE
+static int igb_set_eee(struct net_device *netdev,
+ struct ethtool_eee *edata)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ struct ethtool_eee eee_curr;
+ s32 ret_val;
+
+ if ((hw->mac.type < e1000_i350) ||
+ (hw->phy.media_type != e1000_media_type_copper))
+ return -EOPNOTSUPP;
+
+ ret_val = igb_get_eee(netdev, &eee_curr);
+ if (ret_val)
+ return ret_val;
+
+ if (eee_curr.eee_enabled) {
+ if (eee_curr.tx_lpi_enabled != edata->tx_lpi_enabled) {
+ dev_err(pci_dev_to_dev(adapter->pdev),
+ "Setting EEE tx-lpi is not supported\n");
+ return -EINVAL;
+ }
+
+ /* Tx LPI time is not implemented currently */
+ if (edata->tx_lpi_timer) {
+ dev_err(pci_dev_to_dev(adapter->pdev),
+ "Setting EEE Tx LPI timer is not supported\n");
+ return -EINVAL;
+ }
+
+ if (edata->advertised &
+ ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) {
+ dev_err(pci_dev_to_dev(adapter->pdev),
+ "EEE Advertisement supports only 100Tx and or 100T full duplex\n");
+ return -EINVAL;
+ }
+
+ } else if (!edata->eee_enabled) {
+ dev_err(pci_dev_to_dev(adapter->pdev),
+ "Setting EEE options is not supported with EEE disabled\n");
+ return -EINVAL;
+ }
+
+ adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised);
+
+ if (hw->dev_spec._82575.eee_disable != !edata->eee_enabled) {
+ hw->dev_spec._82575.eee_disable = !edata->eee_enabled;
+
+ /* reset link */
+ if (netif_running(netdev))
+ igb_reinit_locked(adapter);
+ else
+ igb_reset(adapter);
+ }
+
+ return 0;
+}
+#endif /* ETHTOOL_SEEE */
+
+#ifdef ETHTOOL_GRXRINGS
+static int igb_get_rss_hash_opts(struct igb_adapter *adapter,
+ struct ethtool_rxnfc *cmd)
+{
+ cmd->data = 0;
+
+ /* Report default options for RSS on igb */
+ switch (cmd->flow_type) {
+ case TCP_V4_FLOW:
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ case UDP_V4_FLOW:
+ if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ case SCTP_V4_FLOW:
+ case AH_ESP_V4_FLOW:
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ case IPV4_FLOW:
+ cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+ break;
+ case TCP_V6_FLOW:
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ case UDP_V6_FLOW:
+ if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ case SCTP_V6_FLOW:
+ case AH_ESP_V6_FLOW:
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ case IPV6_FLOW:
+ cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int igb_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
+ void *rule_locs)
+#else
+ u32 *rule_locs)
+#endif
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+ int ret = -EOPNOTSUPP;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+ cmd->data = adapter->num_rx_queues;
+ ret = 0;
+ break;
+ case ETHTOOL_GRXFH:
+ ret = igb_get_rss_hash_opts(adapter, cmd);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+#define UDP_RSS_FLAGS (IGB_FLAG_RSS_FIELD_IPV4_UDP | \
+ IGB_FLAG_RSS_FIELD_IPV6_UDP)
+static int igb_set_rss_hash_opt(struct igb_adapter *adapter,
+ struct ethtool_rxnfc *nfc)
+{
+ u32 flags = adapter->flags;
+
+ /*
+ * RSS does not support anything other than hashing
+ * to queues on src and dst IPs and ports
+ */
+ if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3))
+ return -EINVAL;
+
+ switch (nfc->flow_type) {
+ case TCP_V4_FLOW:
+ case TCP_V6_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST) ||
+ !(nfc->data & RXH_L4_B_0_1) ||
+ !(nfc->data & RXH_L4_B_2_3))
+ return -EINVAL;
+ break;
+ case UDP_V4_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST))
+ return -EINVAL;
+ switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ case 0:
+ flags &= ~IGB_FLAG_RSS_FIELD_IPV4_UDP;
+ break;
+ case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+ flags |= IGB_FLAG_RSS_FIELD_IPV4_UDP;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case UDP_V6_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST))
+ return -EINVAL;
+ switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ case 0:
+ flags &= ~IGB_FLAG_RSS_FIELD_IPV6_UDP;
+ break;
+ case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+ flags |= IGB_FLAG_RSS_FIELD_IPV6_UDP;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case AH_ESP_V4_FLOW:
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ case AH_ESP_V6_FLOW:
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST) ||
+ (nfc->data & RXH_L4_B_0_1) ||
+ (nfc->data & RXH_L4_B_2_3))
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* if we changed something we need to update flags */
+ if (flags != adapter->flags) {
+ struct e1000_hw *hw = &adapter->hw;
+ u32 mrqc = E1000_READ_REG(hw, E1000_MRQC);
+
+ if ((flags & UDP_RSS_FLAGS) &&
+ !(adapter->flags & UDP_RSS_FLAGS))
+ DPRINTK(DRV, WARNING,
+ "enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n");
+
+ adapter->flags = flags;
+
+ /* Perform hash on these packet types */
+ mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
+ E1000_MRQC_RSS_FIELD_IPV4_TCP |
+ E1000_MRQC_RSS_FIELD_IPV6 |
+ E1000_MRQC_RSS_FIELD_IPV6_TCP;
+
+ mrqc &= ~(E1000_MRQC_RSS_FIELD_IPV4_UDP |
+ E1000_MRQC_RSS_FIELD_IPV6_UDP);
+
+ if (flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
+ mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
+
+ if (flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
+ mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
+
+ E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
+ }
+
+ return 0;
+}
+
+static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+ int ret = -EOPNOTSUPP;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXFH:
+ ret = igb_set_rss_hash_opt(adapter, cmd);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+#endif /* ETHTOOL_GRXRINGS */
+
+static const struct ethtool_ops igb_ethtool_ops = {
+ .get_settings = igb_get_settings,
+ .set_settings = igb_set_settings,
+ .get_drvinfo = igb_get_drvinfo,
+ .get_regs_len = igb_get_regs_len,
+ .get_regs = igb_get_regs,
+ .get_wol = igb_get_wol,
+ .set_wol = igb_set_wol,
+ .get_msglevel = igb_get_msglevel,
+ .set_msglevel = igb_set_msglevel,
+ .nway_reset = igb_nway_reset,
+ .get_link = igb_get_link,
+ .get_eeprom_len = igb_get_eeprom_len,
+ .get_eeprom = igb_get_eeprom,
+ .set_eeprom = igb_set_eeprom,
+ .get_ringparam = igb_get_ringparam,
+ .set_ringparam = igb_set_ringparam,
+ .get_pauseparam = igb_get_pauseparam,
+ .set_pauseparam = igb_set_pauseparam,
+ .self_test = igb_diag_test,
+ .get_strings = igb_get_strings,
+#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
+#ifdef HAVE_ETHTOOL_SET_PHYS_ID
+ .set_phys_id = igb_set_phys_id,
+#else
+ .phys_id = igb_phys_id,
+#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
+#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
+#ifdef HAVE_ETHTOOL_GET_SSET_COUNT
+ .get_sset_count = igb_get_sset_count,
+#else
+ .get_stats_count = igb_get_stats_count,
+ .self_test_count = igb_diag_test_count,
+#endif
+ .get_ethtool_stats = igb_get_ethtool_stats,
+#ifdef HAVE_ETHTOOL_GET_PERM_ADDR
+ .get_perm_addr = ethtool_op_get_perm_addr,
+#endif
+ .get_coalesce = igb_get_coalesce,
+ .set_coalesce = igb_set_coalesce,
+#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
+#ifdef HAVE_ETHTOOL_GET_TS_INFO
+ .get_ts_info = igb_get_ts_info,
+#endif /* HAVE_ETHTOOL_GET_TS_INFO */
+#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
+#ifdef CONFIG_PM_RUNTIME
+ .begin = igb_ethtool_begin,
+ .complete = igb_ethtool_complete,
+#endif /* CONFIG_PM_RUNTIME */
+#ifndef HAVE_NDO_SET_FEATURES
+ .get_rx_csum = igb_get_rx_csum,
+ .set_rx_csum = igb_set_rx_csum,
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .set_tx_csum = igb_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = ethtool_op_set_sg,
+#ifdef NETIF_F_TSO
+ .get_tso = ethtool_op_get_tso,
+ .set_tso = igb_set_tso,
+#endif
+#ifdef ETHTOOL_GFLAGS
+ .get_flags = ethtool_op_get_flags,
+ .set_flags = igb_set_flags,
+#endif /* ETHTOOL_GFLAGS */
+#endif /* HAVE_NDO_SET_FEATURES */
+#ifdef ETHTOOL_GADV_COAL
+ .get_advcoal = igb_get_adv_coal,
+ .set_advcoal = igb_set_dmac_coal,
+#endif /* ETHTOOL_GADV_COAL */
+#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
+#ifdef ETHTOOL_GEEE
+ .get_eee = igb_get_eee,
+#endif
+#ifdef ETHTOOL_SEEE
+ .set_eee = igb_set_eee,
+#endif
+#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
+#ifdef ETHTOOL_GRXRINGS
+ .get_rxnfc = igb_get_rxnfc,
+ .set_rxnfc = igb_set_rxnfc,
+#endif
+};
+
+#ifdef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
+static const struct ethtool_ops_ext igb_ethtool_ops_ext = {
+ .size = sizeof(struct ethtool_ops_ext),
+ .get_ts_info = igb_get_ts_info,
+ .set_phys_id = igb_set_phys_id,
+ .get_eee = igb_get_eee,
+ .set_eee = igb_set_eee,
+};
+
+void igb_set_ethtool_ops(struct net_device *netdev)
+{
+ SET_ETHTOOL_OPS(netdev, &igb_ethtool_ops);
+ set_ethtool_ops_ext(netdev, &igb_ethtool_ops_ext);
+}
+#else
+void igb_set_ethtool_ops(struct net_device *netdev)
+{
+ /* have to "undeclare" const on this struct to remove warnings */
+ SET_ETHTOOL_OPS(netdev, (struct ethtool_ops *)&igb_ethtool_ops);
+}
+#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
+#endif /* SIOCETHTOOL */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c
new file mode 100644
index 00000000..07a1ae07
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c
@@ -0,0 +1,260 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "igb.h"
+#include "e1000_82575.h"
+#include "e1000_hw.h"
+#ifdef IGB_HWMON
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sysfs.h>
+#include <linux/kobject.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/hwmon.h>
+#include <linux/pci.h>
+
+#ifdef HAVE_I2C_SUPPORT
+static struct i2c_board_info i350_sensor_info = {
+ I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)),
+};
+#endif /* HAVE_I2C_SUPPORT */
+
+/* hwmon callback functions */
+static ssize_t igb_hwmon_show_location(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+ dev_attr);
+ return sprintf(buf, "loc%u\n",
+ igb_attr->sensor->location);
+}
+
+static ssize_t igb_hwmon_show_temp(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+ dev_attr);
+ unsigned int value;
+
+ /* reset the temp field */
+ igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw);
+
+ value = igb_attr->sensor->temp;
+
+ /* display millidegree */
+ value *= 1000;
+
+ return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t igb_hwmon_show_cautionthresh(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+ dev_attr);
+ unsigned int value = igb_attr->sensor->caution_thresh;
+
+ /* display millidegree */
+ value *= 1000;
+
+ return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t igb_hwmon_show_maxopthresh(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+ dev_attr);
+ unsigned int value = igb_attr->sensor->max_op_thresh;
+
+ /* display millidegree */
+ value *= 1000;
+
+ return sprintf(buf, "%u\n", value);
+}
+
+/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file.
+ * @ adapter: pointer to the adapter structure
+ * @ offset: offset in the eeprom sensor data table
+ * @ type: type of sensor data to display
+ *
+ * For each file we want in hwmon's sysfs interface we need a device_attribute
+ * This is included in our hwmon_attr struct that contains the references to
+ * the data structures we need to get the data to display.
+ */
+static int igb_add_hwmon_attr(struct igb_adapter *adapter,
+ unsigned int offset, int type) {
+ int rc;
+ unsigned int n_attr;
+ struct hwmon_attr *igb_attr;
+
+ n_attr = adapter->igb_hwmon_buff.n_hwmon;
+ igb_attr = &adapter->igb_hwmon_buff.hwmon_list[n_attr];
+
+ switch (type) {
+ case IGB_HWMON_TYPE_LOC:
+ igb_attr->dev_attr.show = igb_hwmon_show_location;
+ snprintf(igb_attr->name, sizeof(igb_attr->name),
+ "temp%u_label", offset);
+ break;
+ case IGB_HWMON_TYPE_TEMP:
+ igb_attr->dev_attr.show = igb_hwmon_show_temp;
+ snprintf(igb_attr->name, sizeof(igb_attr->name),
+ "temp%u_input", offset);
+ break;
+ case IGB_HWMON_TYPE_CAUTION:
+ igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh;
+ snprintf(igb_attr->name, sizeof(igb_attr->name),
+ "temp%u_max", offset);
+ break;
+ case IGB_HWMON_TYPE_MAX:
+ igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh;
+ snprintf(igb_attr->name, sizeof(igb_attr->name),
+ "temp%u_crit", offset);
+ break;
+ default:
+ rc = -EPERM;
+ return rc;
+ }
+
+ /* These always the same regardless of type */
+ igb_attr->sensor =
+ &adapter->hw.mac.thermal_sensor_data.sensor[offset];
+ igb_attr->hw = &adapter->hw;
+ igb_attr->dev_attr.store = NULL;
+ igb_attr->dev_attr.attr.mode = S_IRUGO;
+ igb_attr->dev_attr.attr.name = igb_attr->name;
+ sysfs_attr_init(&igb_attr->dev_attr.attr);
+ rc = device_create_file(&adapter->pdev->dev,
+ &igb_attr->dev_attr);
+ if (rc == 0)
+ ++adapter->igb_hwmon_buff.n_hwmon;
+
+ return rc;
+}
+
+static void igb_sysfs_del_adapter(struct igb_adapter *adapter)
+{
+ int i;
+
+ if (adapter == NULL)
+ return;
+
+ for (i = 0; i < adapter->igb_hwmon_buff.n_hwmon; i++) {
+ device_remove_file(&adapter->pdev->dev,
+ &adapter->igb_hwmon_buff.hwmon_list[i].dev_attr);
+ }
+
+ kfree(adapter->igb_hwmon_buff.hwmon_list);
+
+ if (adapter->igb_hwmon_buff.device)
+ hwmon_device_unregister(adapter->igb_hwmon_buff.device);
+}
+
+/* called from igb_main.c */
+void igb_sysfs_exit(struct igb_adapter *adapter)
+{
+ igb_sysfs_del_adapter(adapter);
+}
+
+/* called from igb_main.c */
+int igb_sysfs_init(struct igb_adapter *adapter)
+{
+ struct hwmon_buff *igb_hwmon = &adapter->igb_hwmon_buff;
+ unsigned int i;
+ int n_attrs;
+ int rc = 0;
+#ifdef HAVE_I2C_SUPPORT
+ struct i2c_client *client = NULL;
+#endif /* HAVE_I2C_SUPPORT */
+
+ /* If this method isn't defined we don't support thermals */
+ if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL)
+ goto exit;
+
+ /* Don't create thermal hwmon interface if no sensors present */
+ rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw));
+ if (rc)
+ goto exit;
+#ifdef HAVE_I2C_SUPPORT
+ /* init i2c_client */
+ client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info);
+ if (client == NULL) {
+ dev_info(&adapter->pdev->dev,
+ "Failed to create new i2c device..\n");
+ goto exit;
+ }
+ adapter->i2c_client = client;
+#endif /* HAVE_I2C_SUPPORT */
+
+ /* Allocation space for max attributes
+ * max num sensors * values (loc, temp, max, caution)
+ */
+ n_attrs = E1000_MAX_SENSORS * 4;
+ igb_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr),
+ GFP_KERNEL);
+ if (!igb_hwmon->hwmon_list) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ igb_hwmon->device = hwmon_device_register(&adapter->pdev->dev);
+ if (IS_ERR(igb_hwmon->device)) {
+ rc = PTR_ERR(igb_hwmon->device);
+ goto err;
+ }
+
+ for (i = 0; i < E1000_MAX_SENSORS; i++) {
+
+ /* Only create hwmon sysfs entries for sensors that have
+ * meaningful data.
+ */
+ if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0)
+ continue;
+
+ /* Bail if any hwmon attr struct fails to initialize */
+ rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION);
+ rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC);
+ rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP);
+ rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX);
+ if (rc)
+ goto err;
+ }
+
+ goto exit;
+
+err:
+ igb_sysfs_del_adapter(adapter);
+exit:
+ return rc;
+}
+#endif /* IGB_HWMON */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
new file mode 100644
index 00000000..96acec58
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
@@ -0,0 +1,10291 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/netdevice.h>
+#include <linux/tcp.h>
+#ifdef NETIF_F_TSO
+#include <net/checksum.h>
+#ifdef NETIF_F_TSO6
+#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
+#endif
+#endif
+#ifdef SIOCGMIIPHY
+#include <linux/mii.h>
+#endif
+#ifdef SIOCETHTOOL
+#include <linux/ethtool.h>
+#endif
+#include <linux/if_vlan.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_PM_RUNTIME */
+
+#include <linux/if_bridge.h>
+#include "igb.h"
+#include "igb_vmdq.h"
+
+#include <linux/uio_driver.h>
+
+#if defined(DEBUG) || defined (DEBUG_DUMP) || defined (DEBUG_ICR) || defined(DEBUG_ITR)
+#define DRV_DEBUG "_debug"
+#else
+#define DRV_DEBUG
+#endif
+#define DRV_HW_PERF
+#define VERSION_SUFFIX
+
+#define MAJ 5
+#define MIN 0
+#define BUILD 6
+#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." __stringify(BUILD) VERSION_SUFFIX DRV_DEBUG DRV_HW_PERF
+
+char igb_driver_name[] = "igb";
+char igb_driver_version[] = DRV_VERSION;
+static const char igb_driver_string[] =
+ "Intel(R) Gigabit Ethernet Network Driver";
+static const char igb_copyright[] =
+ "Copyright (c) 2007-2013 Intel Corporation.";
+
+static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) },
+ /* required last entry */
+ {0, }
+};
+
+//MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
+static void igb_set_sriov_capability(struct igb_adapter *adapter) __attribute__((__unused__));
+void igb_reset(struct igb_adapter *);
+static int igb_setup_all_tx_resources(struct igb_adapter *);
+static int igb_setup_all_rx_resources(struct igb_adapter *);
+static void igb_free_all_tx_resources(struct igb_adapter *);
+static void igb_free_all_rx_resources(struct igb_adapter *);
+static void igb_setup_mrqc(struct igb_adapter *);
+void igb_update_stats(struct igb_adapter *);
+static int igb_probe(struct pci_dev *, const struct pci_device_id *);
+static void __devexit igb_remove(struct pci_dev *pdev);
+static int igb_sw_init(struct igb_adapter *);
+static int igb_open(struct net_device *);
+static int igb_close(struct net_device *);
+static void igb_configure(struct igb_adapter *);
+static void igb_configure_tx(struct igb_adapter *);
+static void igb_configure_rx(struct igb_adapter *);
+static void igb_clean_all_tx_rings(struct igb_adapter *);
+static void igb_clean_all_rx_rings(struct igb_adapter *);
+static void igb_clean_tx_ring(struct igb_ring *);
+static void igb_set_rx_mode(struct net_device *);
+static void igb_update_phy_info(unsigned long);
+static void igb_watchdog(unsigned long);
+static void igb_watchdog_task(struct work_struct *);
+static void igb_dma_err_task(struct work_struct *);
+static void igb_dma_err_timer(unsigned long data);
+static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
+static struct net_device_stats *igb_get_stats(struct net_device *);
+static int igb_change_mtu(struct net_device *, int);
+void igb_full_sync_mac_table(struct igb_adapter *adapter);
+static int igb_set_mac(struct net_device *, void *);
+static void igb_set_uta(struct igb_adapter *adapter);
+static irqreturn_t igb_intr(int irq, void *);
+static irqreturn_t igb_intr_msi(int irq, void *);
+static irqreturn_t igb_msix_other(int irq, void *);
+static irqreturn_t igb_msix_ring(int irq, void *);
+#ifdef IGB_DCA
+static void igb_update_dca(struct igb_q_vector *);
+static void igb_setup_dca(struct igb_adapter *);
+#endif /* IGB_DCA */
+static int igb_poll(struct napi_struct *, int);
+static bool igb_clean_tx_irq(struct igb_q_vector *);
+static bool igb_clean_rx_irq(struct igb_q_vector *, int);
+static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
+static void igb_tx_timeout(struct net_device *);
+static void igb_reset_task(struct work_struct *);
+#ifdef HAVE_VLAN_RX_REGISTER
+static void igb_vlan_mode(struct net_device *, struct vlan_group *);
+#endif
+#ifdef HAVE_VLAN_PROTOCOL
+static int igb_vlan_rx_add_vid(struct net_device *,
+ __be16 proto, u16);
+static int igb_vlan_rx_kill_vid(struct net_device *,
+ __be16 proto, u16);
+#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+static int igb_vlan_rx_add_vid(struct net_device *,
+ __always_unused __be16 proto, u16);
+static int igb_vlan_rx_kill_vid(struct net_device *,
+ __always_unused __be16 proto, u16);
+#else
+static int igb_vlan_rx_add_vid(struct net_device *, u16);
+static int igb_vlan_rx_kill_vid(struct net_device *, u16);
+#endif
+#else
+static void igb_vlan_rx_add_vid(struct net_device *, u16);
+static void igb_vlan_rx_kill_vid(struct net_device *, u16);
+#endif
+static void igb_restore_vlan(struct igb_adapter *);
+void igb_rar_set(struct igb_adapter *adapter, u32 index);
+static void igb_ping_all_vfs(struct igb_adapter *);
+static void igb_msg_task(struct igb_adapter *);
+static void igb_vmm_control(struct igb_adapter *);
+static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
+static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
+static void igb_process_mdd_event(struct igb_adapter *);
+#ifdef IFLA_VF_MAX
+static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac);
+static int igb_ndo_set_vf_vlan(struct net_device *netdev,
+ int vf, u16 vlan, u8 qos);
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
+ bool setting);
+#endif
+#ifdef HAVE_VF_MIN_MAX_TXRATE
+static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
+#else /* HAVE_VF_MIN_MAX_TXRATE */
+static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
+#endif /* HAVE_VF_MIN_MAX_TXRATE */
+static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
+ struct ifla_vf_info *ivi);
+static void igb_check_vf_rate_limit(struct igb_adapter *);
+#endif
+static int igb_vf_configure(struct igb_adapter *adapter, int vf);
+#ifdef CONFIG_PM
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+static int igb_suspend(struct device *dev);
+static int igb_resume(struct device *dev);
+#ifdef CONFIG_PM_RUNTIME
+static int igb_runtime_suspend(struct device *dev);
+static int igb_runtime_resume(struct device *dev);
+static int igb_runtime_idle(struct device *dev);
+#endif /* CONFIG_PM_RUNTIME */
+static const struct dev_pm_ops igb_pm_ops = {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34)
+ .suspend = igb_suspend,
+ .resume = igb_resume,
+ .freeze = igb_suspend,
+ .thaw = igb_resume,
+ .poweroff = igb_suspend,
+ .restore = igb_resume,
+#ifdef CONFIG_PM_RUNTIME
+ .runtime_suspend = igb_runtime_suspend,
+ .runtime_resume = igb_runtime_resume,
+ .runtime_idle = igb_runtime_idle,
+#endif
+#else /* Linux >= 2.6.34 */
+ SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
+#ifdef CONFIG_PM_RUNTIME
+ SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
+ igb_runtime_idle)
+#endif /* CONFIG_PM_RUNTIME */
+#endif /* Linux version */
+};
+#else
+static int igb_suspend(struct pci_dev *pdev, pm_message_t state);
+static int igb_resume(struct pci_dev *pdev);
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+#endif /* CONFIG_PM */
+#ifndef USE_REBOOT_NOTIFIER
+static void igb_shutdown(struct pci_dev *);
+#else
+static int igb_notify_reboot(struct notifier_block *, unsigned long, void *);
+static struct notifier_block igb_notifier_reboot = {
+ .notifier_call = igb_notify_reboot,
+ .next = NULL,
+ .priority = 0
+};
+#endif
+#ifdef IGB_DCA
+static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
+static struct notifier_block dca_notifier = {
+ .notifier_call = igb_notify_dca,
+ .next = NULL,
+ .priority = 0
+};
+#endif
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/* for netdump / net console */
+static void igb_netpoll(struct net_device *);
+#endif
+
+#ifdef HAVE_PCI_ERS
+static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
+ pci_channel_state_t);
+static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
+static void igb_io_resume(struct pci_dev *);
+
+static struct pci_error_handlers igb_err_handler = {
+ .error_detected = igb_io_error_detected,
+ .slot_reset = igb_io_slot_reset,
+ .resume = igb_io_resume,
+};
+#endif
+
+static void igb_init_fw(struct igb_adapter *adapter);
+static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
+
+static struct pci_driver igb_driver = {
+ .name = igb_driver_name,
+ .id_table = igb_pci_tbl,
+ .probe = igb_probe,
+ .remove = __devexit_p(igb_remove),
+#ifdef CONFIG_PM
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+ .driver.pm = &igb_pm_ops,
+#else
+ .suspend = igb_suspend,
+ .resume = igb_resume,
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+#endif /* CONFIG_PM */
+#ifndef USE_REBOOT_NOTIFIER
+ .shutdown = igb_shutdown,
+#endif
+#ifdef HAVE_PCI_ERS
+ .err_handler = &igb_err_handler
+#endif
+};
+
+//MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
+//MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
+//MODULE_LICENSE("GPL");
+//MODULE_VERSION(DRV_VERSION);
+
+static void igb_vfta_set(struct igb_adapter *adapter, u32 vid, bool add)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct e1000_host_mng_dhcp_cookie *mng_cookie = &hw->mng_cookie;
+ u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK;
+ u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
+ u32 vfta;
+
+ /*
+ * if this is the management vlan the only option is to add it in so
+ * that the management pass through will continue to work
+ */
+ if ((mng_cookie->status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
+ (vid == mng_cookie->vlan_id))
+ add = TRUE;
+
+ vfta = adapter->shadow_vfta[index];
+
+ if (add)
+ vfta |= mask;
+ else
+ vfta &= ~mask;
+
+ e1000_write_vfta(hw, index, vfta);
+ adapter->shadow_vfta[index] = vfta;
+}
+
+static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE;
+//module_param(debug, int, 0);
+//MODULE_PARM_DESC(debug, "Debug level (0=none, ..., 16=all)");
+
+/**
+ * igb_init_module - Driver Registration Routine
+ *
+ * igb_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init igb_init_module(void)
+{
+ int ret;
+
+ printk(KERN_INFO "%s - version %s\n",
+ igb_driver_string, igb_driver_version);
+
+ printk(KERN_INFO "%s\n", igb_copyright);
+#ifdef IGB_HWMON
+/* only use IGB_PROCFS if IGB_HWMON is not defined */
+#else
+#ifdef IGB_PROCFS
+ if (igb_procfs_topdir_init())
+ printk(KERN_INFO "Procfs failed to initialize topdir\n");
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+
+#ifdef IGB_DCA
+ dca_register_notify(&dca_notifier);
+#endif
+ ret = pci_register_driver(&igb_driver);
+#ifdef USE_REBOOT_NOTIFIER
+ if (ret >= 0) {
+ register_reboot_notifier(&igb_notifier_reboot);
+ }
+#endif
+ return ret;
+}
+
+#undef module_init
+#define module_init(x) static int x(void) __attribute__((__unused__));
+module_init(igb_init_module);
+
+/**
+ * igb_exit_module - Driver Exit Cleanup Routine
+ *
+ * igb_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit igb_exit_module(void)
+{
+#ifdef IGB_DCA
+ dca_unregister_notify(&dca_notifier);
+#endif
+#ifdef USE_REBOOT_NOTIFIER
+ unregister_reboot_notifier(&igb_notifier_reboot);
+#endif
+ pci_unregister_driver(&igb_driver);
+
+#ifdef IGB_HWMON
+/* only compile IGB_PROCFS if IGB_HWMON is not defined */
+#else
+#ifdef IGB_PROCFS
+ igb_procfs_topdir_exit();
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+}
+
+#undef module_exit
+#define module_exit(x) static void x(void) __attribute__((__unused__));
+module_exit(igb_exit_module);
+
+#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
+/**
+ * igb_cache_ring_register - Descriptor ring to register mapping
+ * @adapter: board private structure to initialize
+ *
+ * Once we know the feature-set enabled for the device, we'll cache
+ * the register offset the descriptor ring is assigned to.
+ **/
+static void igb_cache_ring_register(struct igb_adapter *adapter)
+{
+ int i = 0, j = 0;
+ u32 rbase_offset = adapter->vfs_allocated_count;
+
+ switch (adapter->hw.mac.type) {
+ case e1000_82576:
+ /* The queues are allocated for virtualization such that VF 0
+ * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
+ * In order to avoid collision we start at the first free queue
+ * and continue consuming queues in the same sequence
+ */
+ if ((adapter->rss_queues > 1) && adapter->vmdq_pools) {
+ for (; i < adapter->rss_queues; i++)
+ adapter->rx_ring[i]->reg_idx = rbase_offset +
+ Q_IDX_82576(i);
+ }
+ case e1000_82575:
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ case e1000_i210:
+ case e1000_i211:
+ default:
+ for (; i < adapter->num_rx_queues; i++)
+ adapter->rx_ring[i]->reg_idx = rbase_offset + i;
+ for (; j < adapter->num_tx_queues; j++)
+ adapter->tx_ring[j]->reg_idx = rbase_offset + j;
+ break;
+ }
+}
+
+static void igb_configure_lli(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u16 port;
+
+ /* LLI should only be enabled for MSI-X or MSI interrupts */
+ if (!adapter->msix_entries && !(adapter->flags & IGB_FLAG_HAS_MSI))
+ return;
+
+ if (adapter->lli_port) {
+ /* use filter 0 for port */
+ port = htons((u16)adapter->lli_port);
+ E1000_WRITE_REG(hw, E1000_IMIR(0),
+ (port | E1000_IMIR_PORT_IM_EN));
+ E1000_WRITE_REG(hw, E1000_IMIREXT(0),
+ (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
+ }
+
+ if (adapter->flags & IGB_FLAG_LLI_PUSH) {
+ /* use filter 1 for push flag */
+ E1000_WRITE_REG(hw, E1000_IMIR(1),
+ (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
+ E1000_WRITE_REG(hw, E1000_IMIREXT(1),
+ (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_PSH));
+ }
+
+ if (adapter->lli_size) {
+ /* use filter 2 for size */
+ E1000_WRITE_REG(hw, E1000_IMIR(2),
+ (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
+ E1000_WRITE_REG(hw, E1000_IMIREXT(2),
+ (adapter->lli_size | E1000_IMIREXT_CTRL_BP));
+ }
+
+}
+
+/**
+ * igb_write_ivar - configure ivar for given MSI-X vector
+ * @hw: pointer to the HW structure
+ * @msix_vector: vector number we are allocating to a given ring
+ * @index: row index of IVAR register to write within IVAR table
+ * @offset: column offset of in IVAR, should be multiple of 8
+ *
+ * This function is intended to handle the writing of the IVAR register
+ * for adapters 82576 and newer. The IVAR table consists of 2 columns,
+ * each containing an cause allocation for an Rx and Tx ring, and a
+ * variable number of rows depending on the number of queues supported.
+ **/
+static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
+ int index, int offset)
+{
+ u32 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
+
+ /* clear any bits that are currently set */
+ ivar &= ~((u32)0xFF << offset);
+
+ /* write vector and valid bit */
+ ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
+
+ E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
+}
+
+#define IGB_N0_QUEUE -1
+static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
+{
+ struct igb_adapter *adapter = q_vector->adapter;
+ struct e1000_hw *hw = &adapter->hw;
+ int rx_queue = IGB_N0_QUEUE;
+ int tx_queue = IGB_N0_QUEUE;
+ u32 msixbm = 0;
+
+ if (q_vector->rx.ring)
+ rx_queue = q_vector->rx.ring->reg_idx;
+ if (q_vector->tx.ring)
+ tx_queue = q_vector->tx.ring->reg_idx;
+
+ switch (hw->mac.type) {
+ case e1000_82575:
+ /* The 82575 assigns vectors using a bitmask, which matches the
+ bitmask for the EICR/EIMS/EIMC registers. To assign one
+ or more queues to a vector, we write the appropriate bits
+ into the MSIXBM register for that vector. */
+ if (rx_queue > IGB_N0_QUEUE)
+ msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
+ if (tx_queue > IGB_N0_QUEUE)
+ msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
+ if (!adapter->msix_entries && msix_vector == 0)
+ msixbm |= E1000_EIMS_OTHER;
+ E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), msix_vector, msixbm);
+ q_vector->eims_value = msixbm;
+ break;
+ case e1000_82576:
+ /*
+ * 82576 uses a table that essentially consists of 2 columns
+ * with 8 rows. The ordering is column-major so we use the
+ * lower 3 bits as the row index, and the 4th bit as the
+ * column offset.
+ */
+ if (rx_queue > IGB_N0_QUEUE)
+ igb_write_ivar(hw, msix_vector,
+ rx_queue & 0x7,
+ (rx_queue & 0x8) << 1);
+ if (tx_queue > IGB_N0_QUEUE)
+ igb_write_ivar(hw, msix_vector,
+ tx_queue & 0x7,
+ ((tx_queue & 0x8) << 1) + 8);
+ q_vector->eims_value = 1 << msix_vector;
+ break;
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ case e1000_i210:
+ case e1000_i211:
+ /*
+ * On 82580 and newer adapters the scheme is similar to 82576
+ * however instead of ordering column-major we have things
+ * ordered row-major. So we traverse the table by using
+ * bit 0 as the column offset, and the remaining bits as the
+ * row index.
+ */
+ if (rx_queue > IGB_N0_QUEUE)
+ igb_write_ivar(hw, msix_vector,
+ rx_queue >> 1,
+ (rx_queue & 0x1) << 4);
+ if (tx_queue > IGB_N0_QUEUE)
+ igb_write_ivar(hw, msix_vector,
+ tx_queue >> 1,
+ ((tx_queue & 0x1) << 4) + 8);
+ q_vector->eims_value = 1 << msix_vector;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ /* add q_vector eims value to global eims_enable_mask */
+ adapter->eims_enable_mask |= q_vector->eims_value;
+
+ /* configure q_vector to set itr on first interrupt */
+ q_vector->set_itr = 1;
+}
+
+/**
+ * igb_configure_msix - Configure MSI-X hardware
+ *
+ * igb_configure_msix sets up the hardware to properly
+ * generate MSI-X interrupts.
+ **/
+static void igb_configure_msix(struct igb_adapter *adapter)
+{
+ u32 tmp;
+ int i, vector = 0;
+ struct e1000_hw *hw = &adapter->hw;
+
+ adapter->eims_enable_mask = 0;
+
+ /* set vector for other causes, i.e. link changes */
+ switch (hw->mac.type) {
+ case e1000_82575:
+ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ /* enable MSI-X PBA support*/
+ tmp |= E1000_CTRL_EXT_PBA_CLR;
+
+ /* Auto-Mask interrupts upon ICR read. */
+ tmp |= E1000_CTRL_EXT_EIAME;
+ tmp |= E1000_CTRL_EXT_IRCA;
+
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
+
+ /* enable msix_other interrupt */
+ E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), vector++,
+ E1000_EIMS_OTHER);
+ adapter->eims_other = E1000_EIMS_OTHER;
+
+ break;
+
+ case e1000_82576:
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ case e1000_i210:
+ case e1000_i211:
+ /* Turn on MSI-X capability first, or our settings
+ * won't stick. And it will take days to debug. */
+ E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
+ E1000_GPIE_PBA | E1000_GPIE_EIAME |
+ E1000_GPIE_NSICR);
+
+ /* enable msix_other interrupt */
+ adapter->eims_other = 1 << vector;
+ tmp = (vector++ | E1000_IVAR_VALID) << 8;
+
+ E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp);
+ break;
+ default:
+ /* do nothing, since nothing else supports MSI-X */
+ break;
+ } /* switch (hw->mac.type) */
+
+ adapter->eims_enable_mask |= adapter->eims_other;
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ igb_assign_vector(adapter->q_vector[i], vector++);
+
+ E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ * igb_request_msix - Initialize MSI-X interrupts
+ *
+ * igb_request_msix allocates MSI-X vectors and requests interrupts from the
+ * kernel.
+ **/
+static int igb_request_msix(struct igb_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct e1000_hw *hw = &adapter->hw;
+ int i, err = 0, vector = 0, free_vector = 0;
+
+ err = request_irq(adapter->msix_entries[vector].vector,
+ &igb_msix_other, 0, netdev->name, adapter);
+ if (err)
+ goto err_out;
+
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ struct igb_q_vector *q_vector = adapter->q_vector[i];
+
+ vector++;
+
+ q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
+
+ if (q_vector->rx.ring && q_vector->tx.ring)
+ sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
+ q_vector->rx.ring->queue_index);
+ else if (q_vector->tx.ring)
+ sprintf(q_vector->name, "%s-tx-%u", netdev->name,
+ q_vector->tx.ring->queue_index);
+ else if (q_vector->rx.ring)
+ sprintf(q_vector->name, "%s-rx-%u", netdev->name,
+ q_vector->rx.ring->queue_index);
+ else
+ sprintf(q_vector->name, "%s-unused", netdev->name);
+
+ err = request_irq(adapter->msix_entries[vector].vector,
+ igb_msix_ring, 0, q_vector->name,
+ q_vector);
+ if (err)
+ goto err_free;
+ }
+
+ igb_configure_msix(adapter);
+ return 0;
+
+err_free:
+ /* free already assigned IRQs */
+ free_irq(adapter->msix_entries[free_vector++].vector, adapter);
+
+ vector--;
+ for (i = 0; i < vector; i++) {
+ free_irq(adapter->msix_entries[free_vector++].vector,
+ adapter->q_vector[i]);
+ }
+err_out:
+ return err;
+}
+
+static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
+{
+ if (adapter->msix_entries) {
+ pci_disable_msix(adapter->pdev);
+ kfree(adapter->msix_entries);
+ adapter->msix_entries = NULL;
+ } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
+ pci_disable_msi(adapter->pdev);
+ }
+}
+
+/**
+ * igb_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector. In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx)
+{
+ struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
+
+ if (q_vector->tx.ring)
+ adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
+
+ if (q_vector->rx.ring)
+ adapter->tx_ring[q_vector->rx.ring->queue_index] = NULL;
+
+ adapter->q_vector[v_idx] = NULL;
+ netif_napi_del(&q_vector->napi);
+#ifndef IGB_NO_LRO
+ __skb_queue_purge(&q_vector->lrolist.active);
+#endif
+ kfree(q_vector);
+}
+
+/**
+ * igb_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors. In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void igb_free_q_vectors(struct igb_adapter *adapter)
+{
+ int v_idx = adapter->num_q_vectors;
+
+ adapter->num_tx_queues = 0;
+ adapter->num_rx_queues = 0;
+ adapter->num_q_vectors = 0;
+
+ while (v_idx--)
+ igb_free_q_vector(adapter, v_idx);
+}
+
+/**
+ * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
+ *
+ * This function resets the device so that it has 0 rx queues, tx queues, and
+ * MSI-X interrupts allocated.
+ */
+static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
+{
+ igb_free_q_vectors(adapter);
+ igb_reset_interrupt_capability(adapter);
+}
+
+/**
+ * igb_process_mdd_event
+ * @adapter - board private structure
+ *
+ * Identify a malicious VF, disable the VF TX/RX queues and log a message.
+ */
+static void igb_process_mdd_event(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 lvmmc, vfte, vfre, mdfb;
+ u8 vf_queue;
+
+ lvmmc = E1000_READ_REG(hw, E1000_LVMMC);
+ vf_queue = lvmmc >> 29;
+
+ /* VF index cannot be bigger or equal to VFs allocated */
+ if (vf_queue >= adapter->vfs_allocated_count)
+ return;
+
+ netdev_info(adapter->netdev,
+ "VF %d misbehaved. VF queues are disabled. "
+ "VM misbehavior code is 0x%x\n", vf_queue, lvmmc);
+
+ /* Disable VFTE and VFRE related bits */
+ vfte = E1000_READ_REG(hw, E1000_VFTE);
+ vfte &= ~(1 << vf_queue);
+ E1000_WRITE_REG(hw, E1000_VFTE, vfte);
+
+ vfre = E1000_READ_REG(hw, E1000_VFRE);
+ vfre &= ~(1 << vf_queue);
+ E1000_WRITE_REG(hw, E1000_VFRE, vfre);
+
+ /* Disable MDFB related bit. Clear on write */
+ mdfb = E1000_READ_REG(hw, E1000_MDFB);
+ mdfb |= (1 << vf_queue);
+ E1000_WRITE_REG(hw, E1000_MDFB, mdfb);
+
+ /* Reset the specific VF */
+ E1000_WRITE_REG(hw, E1000_VTCTRL(vf_queue), E1000_VTCTRL_RST);
+}
+
+/**
+ * igb_disable_mdd
+ * @adapter - board private structure
+ *
+ * Disable MDD behavior in the HW
+ **/
+static void igb_disable_mdd(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 reg;
+
+ if ((hw->mac.type != e1000_i350) ||
+ (hw->mac.type != e1000_i354))
+ return;
+
+ reg = E1000_READ_REG(hw, E1000_DTXCTL);
+ reg &= (~E1000_DTXCTL_MDP_EN);
+ E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
+}
+
+/**
+ * igb_enable_mdd
+ * @adapter - board private structure
+ *
+ * Enable the HW to detect malicious driver and sends an interrupt to
+ * the driver.
+ **/
+static void igb_enable_mdd(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 reg;
+
+ /* Only available on i350 device */
+ if (hw->mac.type != e1000_i350)
+ return;
+
+ reg = E1000_READ_REG(hw, E1000_DTXCTL);
+ reg |= E1000_DTXCTL_MDP_EN;
+ E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
+}
+
+/**
+ * igb_reset_sriov_capability - disable SR-IOV if enabled
+ *
+ * Attempt to disable single root IO virtualization capabilites present in the
+ * kernel.
+ **/
+static void igb_reset_sriov_capability(struct igb_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ struct e1000_hw *hw = &adapter->hw;
+
+ /* reclaim resources allocated to VFs */
+ if (adapter->vf_data) {
+ if (!pci_vfs_assigned(pdev)) {
+ /*
+ * disable iov and allow time for transactions to
+ * clear
+ */
+ pci_disable_sriov(pdev);
+ msleep(500);
+
+ dev_info(pci_dev_to_dev(pdev), "IOV Disabled\n");
+ } else {
+ dev_info(pci_dev_to_dev(pdev), "IOV Not Disabled\n "
+ "VF(s) are assigned to guests!\n");
+ }
+ /* Disable Malicious Driver Detection */
+ igb_disable_mdd(adapter);
+
+ /* free vf data storage */
+ kfree(adapter->vf_data);
+ adapter->vf_data = NULL;
+
+ /* switch rings back to PF ownership */
+ E1000_WRITE_REG(hw, E1000_IOVCTL,
+ E1000_IOVCTL_REUSE_VFQ);
+ E1000_WRITE_FLUSH(hw);
+ msleep(100);
+ }
+
+ adapter->vfs_allocated_count = 0;
+}
+
+/**
+ * igb_set_sriov_capability - setup SR-IOV if supported
+ *
+ * Attempt to enable single root IO virtualization capabilites present in the
+ * kernel.
+ **/
+static void igb_set_sriov_capability(struct igb_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ int old_vfs = 0;
+ int i;
+
+ old_vfs = pci_num_vf(pdev);
+ if (old_vfs) {
+ dev_info(pci_dev_to_dev(pdev),
+ "%d pre-allocated VFs found - override "
+ "max_vfs setting of %d\n", old_vfs,
+ adapter->vfs_allocated_count);
+ adapter->vfs_allocated_count = old_vfs;
+ }
+ /* no VFs requested, do nothing */
+ if (!adapter->vfs_allocated_count)
+ return;
+
+ /* allocate vf data storage */
+ adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
+ sizeof(struct vf_data_storage),
+ GFP_KERNEL);
+
+ if (adapter->vf_data) {
+ if (!old_vfs) {
+ if (pci_enable_sriov(pdev,
+ adapter->vfs_allocated_count))
+ goto err_out;
+ }
+ for (i = 0; i < adapter->vfs_allocated_count; i++)
+ igb_vf_configure(adapter, i);
+
+ switch (adapter->hw.mac.type) {
+ case e1000_82576:
+ case e1000_i350:
+ /* Enable VM to VM loopback by default */
+ adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE;
+ break;
+ default:
+ /* Currently no other hardware supports loopback */
+ break;
+ }
+
+ /* DMA Coalescing is not supported in IOV mode. */
+ if (adapter->hw.mac.type >= e1000_i350)
+ adapter->dmac = IGB_DMAC_DISABLE;
+ if (adapter->hw.mac.type < e1000_i350)
+ adapter->flags |= IGB_FLAG_DETECT_BAD_DMA;
+ return;
+
+ }
+
+err_out:
+ kfree(adapter->vf_data);
+ adapter->vf_data = NULL;
+ adapter->vfs_allocated_count = 0;
+ dev_warn(pci_dev_to_dev(pdev),
+ "Failed to initialize SR-IOV virtualization\n");
+}
+
+/**
+ * igb_set_interrupt_capability - set MSI or MSI-X if supported
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ int err;
+ int numvecs, i;
+
+ if (!msix)
+ adapter->int_mode = IGB_INT_MODE_MSI;
+
+ /* Number of supported queues. */
+ adapter->num_rx_queues = adapter->rss_queues;
+
+ if (adapter->vmdq_pools > 1)
+ adapter->num_rx_queues += adapter->vmdq_pools - 1;
+
+#ifdef HAVE_TX_MQ
+ if (adapter->vmdq_pools)
+ adapter->num_tx_queues = adapter->vmdq_pools;
+ else
+ adapter->num_tx_queues = adapter->num_rx_queues;
+#else
+ adapter->num_tx_queues = max_t(u32, 1, adapter->vmdq_pools);
+#endif
+
+ switch (adapter->int_mode) {
+ case IGB_INT_MODE_MSIX:
+ /* start with one vector for every rx queue */
+ numvecs = adapter->num_rx_queues;
+
+ /* if tx handler is separate add 1 for every tx queue */
+ if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
+ numvecs += adapter->num_tx_queues;
+
+ /* store the number of vectors reserved for queues */
+ adapter->num_q_vectors = numvecs;
+
+ /* add 1 vector for link status interrupts */
+ numvecs++;
+ adapter->msix_entries = kcalloc(numvecs,
+ sizeof(struct msix_entry),
+ GFP_KERNEL);
+ if (adapter->msix_entries) {
+ for (i = 0; i < numvecs; i++)
+ adapter->msix_entries[i].entry = i;
+
+ err = pci_enable_msix(pdev,
+ adapter->msix_entries, numvecs);
+ if (err == 0)
+ break;
+ }
+ /* MSI-X failed, so fall through and try MSI */
+ dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI-X interrupts. "
+ "Falling back to MSI interrupts.\n");
+ igb_reset_interrupt_capability(adapter);
+ case IGB_INT_MODE_MSI:
+ if (!pci_enable_msi(pdev))
+ adapter->flags |= IGB_FLAG_HAS_MSI;
+ else
+ dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI "
+ "interrupts. Falling back to legacy "
+ "interrupts.\n");
+ /* Fall through */
+ case IGB_INT_MODE_LEGACY:
+ /* disable advanced features and set number of queues to 1 */
+ igb_reset_sriov_capability(adapter);
+ adapter->vmdq_pools = 0;
+ adapter->rss_queues = 1;
+ adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
+ adapter->num_rx_queues = 1;
+ adapter->num_tx_queues = 1;
+ adapter->num_q_vectors = 1;
+ /* Don't do anything; this is system default */
+ break;
+ }
+}
+
+static void igb_add_ring(struct igb_ring *ring,
+ struct igb_ring_container *head)
+{
+ head->ring = ring;
+ head->count++;
+}
+
+/**
+ * igb_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_count: q_vectors allocated on adapter, used for ring interleaving
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ **/
+static int igb_alloc_q_vector(struct igb_adapter *adapter,
+ unsigned int v_count, unsigned int v_idx,
+ unsigned int txr_count, unsigned int txr_idx,
+ unsigned int rxr_count, unsigned int rxr_idx)
+{
+ struct igb_q_vector *q_vector;
+ struct igb_ring *ring;
+ int ring_count, size;
+
+ /* igb only supports 1 Tx and/or 1 Rx queue per vector */
+ if (txr_count > 1 || rxr_count > 1)
+ return -ENOMEM;
+
+ ring_count = txr_count + rxr_count;
+ size = sizeof(struct igb_q_vector) +
+ (sizeof(struct igb_ring) * ring_count);
+
+ /* allocate q_vector and rings */
+ q_vector = kzalloc(size, GFP_KERNEL);
+ if (!q_vector)
+ return -ENOMEM;
+
+#ifndef IGB_NO_LRO
+ /* initialize LRO */
+ __skb_queue_head_init(&q_vector->lrolist.active);
+
+#endif
+ /* initialize NAPI */
+ netif_napi_add(adapter->netdev, &q_vector->napi,
+ igb_poll, 64);
+
+ /* tie q_vector and adapter together */
+ adapter->q_vector[v_idx] = q_vector;
+ q_vector->adapter = adapter;
+
+ /* initialize work limits */
+ q_vector->tx.work_limit = adapter->tx_work_limit;
+
+ /* initialize ITR configuration */
+ q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0);
+ q_vector->itr_val = IGB_START_ITR;
+
+ /* initialize pointer to rings */
+ ring = q_vector->ring;
+
+ /* intialize ITR */
+ if (rxr_count) {
+ /* rx or rx/tx vector */
+ if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
+ q_vector->itr_val = adapter->rx_itr_setting;
+ } else {
+ /* tx only vector */
+ if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
+ q_vector->itr_val = adapter->tx_itr_setting;
+ }
+
+ if (txr_count) {
+ /* assign generic ring traits */
+ ring->dev = &adapter->pdev->dev;
+ ring->netdev = adapter->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Tx values */
+ igb_add_ring(ring, &q_vector->tx);
+
+ /* For 82575, context index must be unique per ring. */
+ if (adapter->hw.mac.type == e1000_82575)
+ set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
+
+ /* apply Tx specific ring traits */
+ ring->count = adapter->tx_ring_count;
+ ring->queue_index = txr_idx;
+
+ /* assign ring to adapter */
+ adapter->tx_ring[txr_idx] = ring;
+
+ /* push pointer to next ring */
+ ring++;
+ }
+
+ if (rxr_count) {
+ /* assign generic ring traits */
+ ring->dev = &adapter->pdev->dev;
+ ring->netdev = adapter->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Rx values */
+ igb_add_ring(ring, &q_vector->rx);
+
+#ifndef HAVE_NDO_SET_FEATURES
+ /* enable rx checksum */
+ set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags);
+
+#endif
+ /* set flag indicating ring supports SCTP checksum offload */
+ if (adapter->hw.mac.type >= e1000_82576)
+ set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
+
+ if ((adapter->hw.mac.type == e1000_i350) ||
+ (adapter->hw.mac.type == e1000_i354))
+ set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
+
+ /* apply Rx specific ring traits */
+ ring->count = adapter->rx_ring_count;
+ ring->queue_index = rxr_idx;
+
+ /* assign ring to adapter */
+ adapter->rx_ring[rxr_idx] = ring;
+ }
+
+ return 0;
+}
+
+/**
+ * igb_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ **/
+static int igb_alloc_q_vectors(struct igb_adapter *adapter)
+{
+ int q_vectors = adapter->num_q_vectors;
+ int rxr_remaining = adapter->num_rx_queues;
+ int txr_remaining = adapter->num_tx_queues;
+ int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+ int err;
+
+ if (q_vectors >= (rxr_remaining + txr_remaining)) {
+ for (; rxr_remaining; v_idx++) {
+ err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
+ 0, 0, 1, rxr_idx);
+
+ if (err)
+ goto err_out;
+
+ /* update counts and index */
+ rxr_remaining--;
+ rxr_idx++;
+ }
+ }
+
+ for (; v_idx < q_vectors; v_idx++) {
+ int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+ int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+ err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
+ tqpv, txr_idx, rqpv, rxr_idx);
+
+ if (err)
+ goto err_out;
+
+ /* update counts and index */
+ rxr_remaining -= rqpv;
+ txr_remaining -= tqpv;
+ rxr_idx++;
+ txr_idx++;
+ }
+
+ return 0;
+
+err_out:
+ adapter->num_tx_queues = 0;
+ adapter->num_rx_queues = 0;
+ adapter->num_q_vectors = 0;
+
+ while (v_idx--)
+ igb_free_q_vector(adapter, v_idx);
+
+ return -ENOMEM;
+}
+
+/**
+ * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
+ *
+ * This function initializes the interrupts and allocates all of the queues.
+ **/
+static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ int err;
+
+ igb_set_interrupt_capability(adapter, msix);
+
+ err = igb_alloc_q_vectors(adapter);
+ if (err) {
+ dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for vectors\n");
+ goto err_alloc_q_vectors;
+ }
+
+ igb_cache_ring_register(adapter);
+
+ return 0;
+
+err_alloc_q_vectors:
+ igb_reset_interrupt_capability(adapter);
+ return err;
+}
+
+/**
+ * igb_request_irq - initialize interrupts
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int igb_request_irq(struct igb_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+ int err = 0;
+
+ if (adapter->msix_entries) {
+ err = igb_request_msix(adapter);
+ if (!err)
+ goto request_done;
+ /* fall back to MSI */
+ igb_free_all_tx_resources(adapter);
+ igb_free_all_rx_resources(adapter);
+
+ igb_clear_interrupt_scheme(adapter);
+ igb_reset_sriov_capability(adapter);
+ err = igb_init_interrupt_scheme(adapter, false);
+ if (err)
+ goto request_done;
+ igb_setup_all_tx_resources(adapter);
+ igb_setup_all_rx_resources(adapter);
+ igb_configure(adapter);
+ }
+
+ igb_assign_vector(adapter->q_vector[0], 0);
+
+ if (adapter->flags & IGB_FLAG_HAS_MSI) {
+ err = request_irq(pdev->irq, &igb_intr_msi, 0,
+ netdev->name, adapter);
+ if (!err)
+ goto request_done;
+
+ /* fall back to legacy interrupts */
+ igb_reset_interrupt_capability(adapter);
+ adapter->flags &= ~IGB_FLAG_HAS_MSI;
+ }
+
+ err = request_irq(pdev->irq, &igb_intr, IRQF_SHARED,
+ netdev->name, adapter);
+
+ if (err)
+ dev_err(pci_dev_to_dev(pdev), "Error %d getting interrupt\n",
+ err);
+
+request_done:
+ return err;
+}
+
+static void igb_free_irq(struct igb_adapter *adapter)
+{
+ if (adapter->msix_entries) {
+ int vector = 0, i;
+
+ free_irq(adapter->msix_entries[vector++].vector, adapter);
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ free_irq(adapter->msix_entries[vector++].vector,
+ adapter->q_vector[i]);
+ } else {
+ free_irq(adapter->pdev->irq, adapter);
+ }
+}
+
+/**
+ * igb_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+static void igb_irq_disable(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ /*
+ * we need to be careful when disabling interrupts. The VFs are also
+ * mapped into these registers and so clearing the bits can cause
+ * issues on the VF drivers so we only need to clear what we set
+ */
+ if (adapter->msix_entries) {
+ u32 regval = E1000_READ_REG(hw, E1000_EIAM);
+ E1000_WRITE_REG(hw, E1000_EIAM, regval & ~adapter->eims_enable_mask);
+ E1000_WRITE_REG(hw, E1000_EIMC, adapter->eims_enable_mask);
+ regval = E1000_READ_REG(hw, E1000_EIAC);
+ E1000_WRITE_REG(hw, E1000_EIAC, regval & ~adapter->eims_enable_mask);
+ }
+
+ E1000_WRITE_REG(hw, E1000_IAM, 0);
+ E1000_WRITE_REG(hw, E1000_IMC, ~0);
+ E1000_WRITE_FLUSH(hw);
+
+ if (adapter->msix_entries) {
+ int vector = 0, i;
+
+ synchronize_irq(adapter->msix_entries[vector++].vector);
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ synchronize_irq(adapter->msix_entries[vector++].vector);
+ } else {
+ synchronize_irq(adapter->pdev->irq);
+ }
+}
+
+/**
+ * igb_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ **/
+static void igb_irq_enable(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ if (adapter->msix_entries) {
+ u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
+ u32 regval = E1000_READ_REG(hw, E1000_EIAC);
+ E1000_WRITE_REG(hw, E1000_EIAC, regval | adapter->eims_enable_mask);
+ regval = E1000_READ_REG(hw, E1000_EIAM);
+ E1000_WRITE_REG(hw, E1000_EIAM, regval | adapter->eims_enable_mask);
+ E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_enable_mask);
+ if (adapter->vfs_allocated_count) {
+ E1000_WRITE_REG(hw, E1000_MBVFIMR, 0xFF);
+ ims |= E1000_IMS_VMMB;
+ if (adapter->mdd)
+ if ((adapter->hw.mac.type == e1000_i350) ||
+ (adapter->hw.mac.type == e1000_i354))
+ ims |= E1000_IMS_MDDET;
+ }
+ E1000_WRITE_REG(hw, E1000_IMS, ims);
+ } else {
+ E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK |
+ E1000_IMS_DRSTA);
+ E1000_WRITE_REG(hw, E1000_IAM, IMS_ENABLE_MASK |
+ E1000_IMS_DRSTA);
+ }
+}
+
+static void igb_update_mng_vlan(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u16 vid = adapter->hw.mng_cookie.vlan_id;
+ u16 old_vid = adapter->mng_vlan_id;
+
+ if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
+ /* add VID to filter table */
+ igb_vfta_set(adapter, vid, TRUE);
+ adapter->mng_vlan_id = vid;
+ } else {
+ adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
+ }
+
+ if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
+ (vid != old_vid) &&
+#ifdef HAVE_VLAN_RX_REGISTER
+ !vlan_group_get_device(adapter->vlgrp, old_vid)) {
+#else
+ !test_bit(old_vid, adapter->active_vlans)) {
+#endif
+ /* remove VID from filter table */
+ igb_vfta_set(adapter, old_vid, FALSE);
+ }
+}
+
+/**
+ * igb_release_hw_control - release control of the h/w to f/w
+ * @adapter: address of board private structure
+ *
+ * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that the
+ * driver is no longer loaded.
+ *
+ **/
+static void igb_release_hw_control(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 ctrl_ext;
+
+ /* Let firmware take over control of h/w */
+ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT,
+ ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * igb_get_hw_control - get control of the h/w from f/w
+ * @adapter: address of board private structure
+ *
+ * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that
+ * the driver is loaded.
+ *
+ **/
+static void igb_get_hw_control(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 ctrl_ext;
+
+ /* Let firmware know the driver has taken over */
+ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT,
+ ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * igb_configure - configure the hardware for RX and TX
+ * @adapter: private board structure
+ **/
+static void igb_configure(struct igb_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ int i;
+
+ igb_get_hw_control(adapter);
+ igb_set_rx_mode(netdev);
+
+ igb_restore_vlan(adapter);
+
+ igb_setup_tctl(adapter);
+ igb_setup_mrqc(adapter);
+ igb_setup_rctl(adapter);
+
+ igb_configure_tx(adapter);
+ igb_configure_rx(adapter);
+
+ e1000_rx_fifo_flush_82575(&adapter->hw);
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ if (adapter->num_tx_queues > 1)
+ netdev->features |= NETIF_F_MULTI_QUEUE;
+ else
+ netdev->features &= ~NETIF_F_MULTI_QUEUE;
+#endif
+
+ /* call igb_desc_unused which always leaves
+ * at least 1 descriptor unused to make sure
+ * next_to_use != next_to_clean */
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct igb_ring *ring = adapter->rx_ring[i];
+ igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
+ }
+}
+
+/**
+ * igb_power_up_link - Power up the phy/serdes link
+ * @adapter: address of board private structure
+ **/
+void igb_power_up_link(struct igb_adapter *adapter)
+{
+ e1000_phy_hw_reset(&adapter->hw);
+
+ if (adapter->hw.phy.media_type == e1000_media_type_copper)
+ e1000_power_up_phy(&adapter->hw);
+ else
+ e1000_power_up_fiber_serdes_link(&adapter->hw);
+}
+
+/**
+ * igb_power_down_link - Power down the phy/serdes link
+ * @adapter: address of board private structure
+ */
+static void igb_power_down_link(struct igb_adapter *adapter)
+{
+ if (adapter->hw.phy.media_type == e1000_media_type_copper)
+ e1000_power_down_phy(&adapter->hw);
+ else
+ e1000_shutdown_fiber_serdes_link(&adapter->hw);
+}
+
+/* Detect and switch function for Media Auto Sense */
+static void igb_check_swap_media(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 ctrl_ext, connsw;
+ bool swap_now = false;
+ bool link;
+
+ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ connsw = E1000_READ_REG(hw, E1000_CONNSW);
+ link = igb_has_link(adapter);
+
+ /* need to live swap if current media is copper and we have fiber/serdes
+ * to go to.
+ */
+
+ if ((hw->phy.media_type == e1000_media_type_copper) &&
+ (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
+ swap_now = true;
+ } else if (!(connsw & E1000_CONNSW_SERDESD)) {
+ /* copper signal takes time to appear */
+ if (adapter->copper_tries < 2) {
+ adapter->copper_tries++;
+ connsw |= E1000_CONNSW_AUTOSENSE_CONF;
+ E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
+ return;
+ } else {
+ adapter->copper_tries = 0;
+ if ((connsw & E1000_CONNSW_PHYSD) &&
+ (!(connsw & E1000_CONNSW_PHY_PDN))) {
+ swap_now = true;
+ connsw &= ~E1000_CONNSW_AUTOSENSE_CONF;
+ E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
+ }
+ }
+ }
+
+ if (swap_now) {
+ switch (hw->phy.media_type) {
+ case e1000_media_type_copper:
+ dev_info(pci_dev_to_dev(adapter->pdev),
+ "%s:MAS: changing media to fiber/serdes\n",
+ adapter->netdev->name);
+ ctrl_ext |=
+ E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
+ adapter->flags |= IGB_FLAG_MEDIA_RESET;
+ adapter->copper_tries = 0;
+ break;
+ case e1000_media_type_internal_serdes:
+ case e1000_media_type_fiber:
+ dev_info(pci_dev_to_dev(adapter->pdev),
+ "%s:MAS: changing media to copper\n",
+ adapter->netdev->name);
+ ctrl_ext &=
+ ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
+ adapter->flags |= IGB_FLAG_MEDIA_RESET;
+ break;
+ default:
+ /* shouldn't get here during regular operation */
+ dev_err(pci_dev_to_dev(adapter->pdev),
+ "%s:AMS: Invalid media type found, returning\n",
+ adapter->netdev->name);
+ break;
+ }
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+ }
+}
+
+#ifdef HAVE_I2C_SUPPORT
+/* igb_get_i2c_data - Reads the I2C SDA data bit
+ * @hw: pointer to hardware structure
+ * @i2cctl: Current value of I2CCTL register
+ *
+ * Returns the I2C data bit value
+ */
+static int igb_get_i2c_data(void *data)
+{
+ struct igb_adapter *adapter = (struct igb_adapter *)data;
+ struct e1000_hw *hw = &adapter->hw;
+ s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+ return (i2cctl & E1000_I2C_DATA_IN) != 0;
+}
+
+/* igb_set_i2c_data - Sets the I2C data bit
+ * @data: pointer to hardware structure
+ * @state: I2C data value (0 or 1) to set
+ *
+ * Sets the I2C data bit
+ */
+static void igb_set_i2c_data(void *data, int state)
+{
+ struct igb_adapter *adapter = (struct igb_adapter *)data;
+ struct e1000_hw *hw = &adapter->hw;
+ s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+ if (state)
+ i2cctl |= E1000_I2C_DATA_OUT;
+ else
+ i2cctl &= ~E1000_I2C_DATA_OUT;
+
+ i2cctl &= ~E1000_I2C_DATA_OE_N;
+ i2cctl |= E1000_I2C_CLK_OE_N;
+
+ E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl);
+ E1000_WRITE_FLUSH(hw);
+
+}
+
+/* igb_set_i2c_clk - Sets the I2C SCL clock
+ * @data: pointer to hardware structure
+ * @state: state to set clock
+ *
+ * Sets the I2C clock line to state
+ */
+static void igb_set_i2c_clk(void *data, int state)
+{
+ struct igb_adapter *adapter = (struct igb_adapter *)data;
+ struct e1000_hw *hw = &adapter->hw;
+ s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+ if (state) {
+ i2cctl |= E1000_I2C_CLK_OUT;
+ i2cctl &= ~E1000_I2C_CLK_OE_N;
+ } else {
+ i2cctl &= ~E1000_I2C_CLK_OUT;
+ i2cctl &= ~E1000_I2C_CLK_OE_N;
+ }
+ E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl);
+ E1000_WRITE_FLUSH(hw);
+}
+
+/* igb_get_i2c_clk - Gets the I2C SCL clock state
+ * @data: pointer to hardware structure
+ *
+ * Gets the I2C clock state
+ */
+static int igb_get_i2c_clk(void *data)
+{
+ struct igb_adapter *adapter = (struct igb_adapter *)data;
+ struct e1000_hw *hw = &adapter->hw;
+ s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+ return (i2cctl & E1000_I2C_CLK_IN) != 0;
+}
+
+static const struct i2c_algo_bit_data igb_i2c_algo = {
+ .setsda = igb_set_i2c_data,
+ .setscl = igb_set_i2c_clk,
+ .getsda = igb_get_i2c_data,
+ .getscl = igb_get_i2c_clk,
+ .udelay = 5,
+ .timeout = 20,
+};
+
+/* igb_init_i2c - Init I2C interface
+ * @adapter: pointer to adapter structure
+ *
+ */
+static s32 igb_init_i2c(struct igb_adapter *adapter)
+{
+ s32 status = E1000_SUCCESS;
+
+ /* I2C interface supported on i350 devices */
+ if (adapter->hw.mac.type != e1000_i350)
+ return E1000_SUCCESS;
+
+ /* Initialize the i2c bus which is controlled by the registers.
+ * This bus will use the i2c_algo_bit structue that implements
+ * the protocol through toggling of the 4 bits in the register.
+ */
+ adapter->i2c_adap.owner = THIS_MODULE;
+ adapter->i2c_algo = igb_i2c_algo;
+ adapter->i2c_algo.data = adapter;
+ adapter->i2c_adap.algo_data = &adapter->i2c_algo;
+ adapter->i2c_adap.dev.parent = &adapter->pdev->dev;
+ strlcpy(adapter->i2c_adap.name, "igb BB",
+ sizeof(adapter->i2c_adap.name));
+ status = i2c_bit_add_bus(&adapter->i2c_adap);
+ return status;
+}
+
+#endif /* HAVE_I2C_SUPPORT */
+/**
+ * igb_up - Open the interface and prepare it to handle traffic
+ * @adapter: board private structure
+ **/
+int igb_up(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int i;
+
+ /* hardware has been reset, we need to reload some things */
+ igb_configure(adapter);
+
+ clear_bit(__IGB_DOWN, &adapter->state);
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ napi_enable(&(adapter->q_vector[i]->napi));
+
+ if (adapter->msix_entries)
+ igb_configure_msix(adapter);
+ else
+ igb_assign_vector(adapter->q_vector[0], 0);
+
+ igb_configure_lli(adapter);
+
+ /* Clear any pending interrupts. */
+ E1000_READ_REG(hw, E1000_ICR);
+ igb_irq_enable(adapter);
+
+ /* notify VFs that reset has been completed */
+ if (adapter->vfs_allocated_count) {
+ u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ reg_data |= E1000_CTRL_EXT_PFRSTD;
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
+ }
+
+ netif_tx_start_all_queues(adapter->netdev);
+
+ if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+ schedule_work(&adapter->dma_err_task);
+ /* start the watchdog. */
+ hw->mac.get_link_status = 1;
+ schedule_work(&adapter->watchdog_task);
+
+ if ((adapter->flags & IGB_FLAG_EEE) &&
+ (!hw->dev_spec._82575.eee_disable))
+ adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T;
+
+ return 0;
+}
+
+void igb_down(struct igb_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct e1000_hw *hw = &adapter->hw;
+ u32 tctl, rctl;
+ int i;
+
+ /* signal that we're down so the interrupt handler does not
+ * reschedule our watchdog timer */
+ set_bit(__IGB_DOWN, &adapter->state);
+
+ /* disable receives in the hardware */
+ rctl = E1000_READ_REG(hw, E1000_RCTL);
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+ /* flush and sleep below */
+
+ netif_tx_stop_all_queues(netdev);
+
+ /* disable transmits in the hardware */
+ tctl = E1000_READ_REG(hw, E1000_TCTL);
+ tctl &= ~E1000_TCTL_EN;
+ E1000_WRITE_REG(hw, E1000_TCTL, tctl);
+ /* flush both disables and wait for them to finish */
+ E1000_WRITE_FLUSH(hw);
+ usleep_range(10000, 20000);
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ napi_disable(&(adapter->q_vector[i]->napi));
+
+ igb_irq_disable(adapter);
+
+ adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
+
+ del_timer_sync(&adapter->watchdog_timer);
+ if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+ del_timer_sync(&adapter->dma_err_timer);
+ del_timer_sync(&adapter->phy_info_timer);
+
+ netif_carrier_off(netdev);
+
+ /* record the stats before reset*/
+ igb_update_stats(adapter);
+
+ adapter->link_speed = 0;
+ adapter->link_duplex = 0;
+
+#ifdef HAVE_PCI_ERS
+ if (!pci_channel_offline(adapter->pdev))
+ igb_reset(adapter);
+#else
+ igb_reset(adapter);
+#endif
+ igb_clean_all_tx_rings(adapter);
+ igb_clean_all_rx_rings(adapter);
+#ifdef IGB_DCA
+ /* since we reset the hardware DCA settings were cleared */
+ igb_setup_dca(adapter);
+#endif
+}
+
+void igb_reinit_locked(struct igb_adapter *adapter)
+{
+ WARN_ON(in_interrupt());
+ while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+ usleep_range(1000, 2000);
+ igb_down(adapter);
+ igb_up(adapter);
+ clear_bit(__IGB_RESETTING, &adapter->state);
+}
+
+/**
+ * igb_enable_mas - Media Autosense re-enable after swap
+ *
+ * @adapter: adapter struct
+ **/
+static s32 igb_enable_mas(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 connsw;
+ s32 ret_val = E1000_SUCCESS;
+
+ connsw = E1000_READ_REG(hw, E1000_CONNSW);
+ if (hw->phy.media_type == e1000_media_type_copper) {
+ /* configure for SerDes media detect */
+ if (!(connsw & E1000_CONNSW_SERDESD)) {
+ connsw |= E1000_CONNSW_ENRGSRC;
+ connsw |= E1000_CONNSW_AUTOSENSE_EN;
+ E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
+ E1000_WRITE_FLUSH(hw);
+ } else if (connsw & E1000_CONNSW_SERDESD) {
+ /* already SerDes, no need to enable anything */
+ return ret_val;
+ } else {
+ dev_info(pci_dev_to_dev(adapter->pdev),
+ "%s:MAS: Unable to configure feature, disabling..\n",
+ adapter->netdev->name);
+ adapter->flags &= ~IGB_FLAG_MAS_ENABLE;
+ }
+ }
+ return ret_val;
+}
+
+void igb_reset(struct igb_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ struct e1000_hw *hw = &adapter->hw;
+ struct e1000_mac_info *mac = &hw->mac;
+ struct e1000_fc_info *fc = &hw->fc;
+ u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm;
+
+ /* Repartition Pba for greater than 9k mtu
+ * To take effect CTRL.RST is required.
+ */
+ switch (mac->type) {
+ case e1000_i350:
+ case e1000_82580:
+ case e1000_i354:
+ pba = E1000_READ_REG(hw, E1000_RXPBS);
+ pba = e1000_rxpbs_adjust_82580(pba);
+ break;
+ case e1000_82576:
+ pba = E1000_READ_REG(hw, E1000_RXPBS);
+ pba &= E1000_RXPBS_SIZE_MASK_82576;
+ break;
+ case e1000_82575:
+ case e1000_i210:
+ case e1000_i211:
+ default:
+ pba = E1000_PBA_34K;
+ break;
+ }
+
+ if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
+ (mac->type < e1000_82576)) {
+ /* adjust PBA for jumbo frames */
+ E1000_WRITE_REG(hw, E1000_PBA, pba);
+
+ /* To maintain wire speed transmits, the Tx FIFO should be
+ * large enough to accommodate two full transmit packets,
+ * rounded up to the next 1KB and expressed in KB. Likewise,
+ * the Rx FIFO should be large enough to accommodate at least
+ * one full receive packet and is similarly rounded up and
+ * expressed in KB. */
+ pba = E1000_READ_REG(hw, E1000_PBA);
+ /* upper 16 bits has Tx packet buffer allocation size in KB */
+ tx_space = pba >> 16;
+ /* lower 16 bits has Rx packet buffer allocation size in KB */
+ pba &= 0xffff;
+ /* the tx fifo also stores 16 bytes of information about the tx
+ * but don't include ethernet FCS because hardware appends it */
+ min_tx_space = (adapter->max_frame_size +
+ sizeof(union e1000_adv_tx_desc) -
+ ETH_FCS_LEN) * 2;
+ min_tx_space = ALIGN(min_tx_space, 1024);
+ min_tx_space >>= 10;
+ /* software strips receive CRC, so leave room for it */
+ min_rx_space = adapter->max_frame_size;
+ min_rx_space = ALIGN(min_rx_space, 1024);
+ min_rx_space >>= 10;
+
+ /* If current Tx allocation is less than the min Tx FIFO size,
+ * and the min Tx FIFO size is less than the current Rx FIFO
+ * allocation, take space away from current Rx allocation */
+ if (tx_space < min_tx_space &&
+ ((min_tx_space - tx_space) < pba)) {
+ pba = pba - (min_tx_space - tx_space);
+
+ /* if short on rx space, rx wins and must trump tx
+ * adjustment */
+ if (pba < min_rx_space)
+ pba = min_rx_space;
+ }
+ E1000_WRITE_REG(hw, E1000_PBA, pba);
+ }
+
+ /* flow control settings */
+ /* The high water mark must be low enough to fit one full frame
+ * (or the size used for early receive) above it in the Rx FIFO.
+ * Set it to the lower of:
+ * - 90% of the Rx FIFO size, or
+ * - the full Rx FIFO size minus one full frame */
+ hwm = min(((pba << 10) * 9 / 10),
+ ((pba << 10) - 2 * adapter->max_frame_size));
+
+ fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */
+ fc->low_water = fc->high_water - 16;
+ fc->pause_time = 0xFFFF;
+ fc->send_xon = 1;
+ fc->current_mode = fc->requested_mode;
+
+ /* disable receive for all VFs and wait one second */
+ if (adapter->vfs_allocated_count) {
+ int i;
+ /*
+ * Clear all flags except indication that the PF has set
+ * the VF MAC addresses administratively
+ */
+ for (i = 0 ; i < adapter->vfs_allocated_count; i++)
+ adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
+
+ /* ping all the active vfs to let them know we are going down */
+ igb_ping_all_vfs(adapter);
+
+ /* disable transmits and receives */
+ E1000_WRITE_REG(hw, E1000_VFRE, 0);
+ E1000_WRITE_REG(hw, E1000_VFTE, 0);
+ }
+
+ /* Allow time for pending master requests to run */
+ e1000_reset_hw(hw);
+ E1000_WRITE_REG(hw, E1000_WUC, 0);
+
+ if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
+ e1000_setup_init_funcs(hw, TRUE);
+ igb_check_options(adapter);
+ e1000_get_bus_info(hw);
+ adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
+ }
+ if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
+ if (igb_enable_mas(adapter))
+ dev_err(pci_dev_to_dev(pdev),
+ "Error enabling Media Auto Sense\n");
+ }
+ if (e1000_init_hw(hw))
+ dev_err(pci_dev_to_dev(pdev), "Hardware Error\n");
+
+ /*
+ * Flow control settings reset on hardware reset, so guarantee flow
+ * control is off when forcing speed.
+ */
+ if (!hw->mac.autoneg)
+ e1000_force_mac_fc(hw);
+
+ igb_init_dmac(adapter, pba);
+ /* Re-initialize the thermal sensor on i350 devices. */
+ if (mac->type == e1000_i350 && hw->bus.func == 0) {
+ /*
+ * If present, re-initialize the external thermal sensor
+ * interface.
+ */
+ if (adapter->ets)
+ e1000_set_i2c_bb(hw);
+ e1000_init_thermal_sensor_thresh(hw);
+ }
+
+ /*Re-establish EEE setting */
+ if (hw->phy.media_type == e1000_media_type_copper) {
+ switch (mac->type) {
+ case e1000_i350:
+ case e1000_i210:
+ case e1000_i211:
+ e1000_set_eee_i350(hw);
+ break;
+ case e1000_i354:
+ e1000_set_eee_i354(hw);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!netif_running(adapter->netdev))
+ igb_power_down_link(adapter);
+
+ igb_update_mng_vlan(adapter);
+
+ /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
+ E1000_WRITE_REG(hw, E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
+
+
+#ifdef HAVE_PTP_1588_CLOCK
+ /* Re-enable PTP, where applicable. */
+ igb_ptp_reset(adapter);
+#endif /* HAVE_PTP_1588_CLOCK */
+
+ e1000_get_phy_info(hw);
+
+ adapter->devrc++;
+}
+
+#ifdef HAVE_NDO_SET_FEATURES
+static kni_netdev_features_t igb_fix_features(struct net_device *netdev,
+ kni_netdev_features_t features)
+{
+ /*
+ * Since there is no support for separate tx vlan accel
+ * enabled make sure tx flag is cleared if rx is.
+ */
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+ if (!(features & NETIF_F_HW_VLAN_CTAG_RX))
+ features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+#else
+ if (!(features & NETIF_F_HW_VLAN_RX))
+ features &= ~NETIF_F_HW_VLAN_TX;
+#endif
+
+ /* If Rx checksum is disabled, then LRO should also be disabled */
+ if (!(features & NETIF_F_RXCSUM))
+ features &= ~NETIF_F_LRO;
+
+ return features;
+}
+
+static int igb_set_features(struct net_device *netdev,
+ kni_netdev_features_t features)
+{
+ u32 changed = netdev->features ^ features;
+
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+ if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+#else
+ if (changed & NETIF_F_HW_VLAN_RX)
+#endif
+ igb_vlan_mode(netdev, features);
+
+ return 0;
+}
+
+#ifdef NTF_SELF
+#ifdef USE_CONST_DEV_UC_CHAR
+static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr,
+#ifdef HAVE_NDO_FDB_ADD_VID
+ u16 vid,
+#endif
+ u16 flags)
+#else
+static int igb_ndo_fdb_add(struct ndmsg *ndm,
+ struct net_device *dev,
+ unsigned char *addr,
+ u16 flags)
+#endif
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+ struct e1000_hw *hw = &adapter->hw;
+ int err;
+
+ if (!(adapter->vfs_allocated_count))
+ return -EOPNOTSUPP;
+
+ /* Hardware does not support aging addresses so if a
+ * ndm_state is given only allow permanent addresses
+ */
+ if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
+ pr_info("%s: FDB only supports static addresses\n",
+ igb_driver_name);
+ return -EINVAL;
+ }
+
+ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
+ u32 rar_uc_entries = hw->mac.rar_entry_count -
+ (adapter->vfs_allocated_count + 1);
+
+ if (netdev_uc_count(dev) < rar_uc_entries)
+ err = dev_uc_add_excl(dev, addr);
+ else
+ err = -ENOMEM;
+ } else if (is_multicast_ether_addr(addr)) {
+ err = dev_mc_add_excl(dev, addr);
+ } else {
+ err = -EINVAL;
+ }
+
+ /* Only return duplicate errors if NLM_F_EXCL is set */
+ if (err == -EEXIST && !(flags & NLM_F_EXCL))
+ err = 0;
+
+ return err;
+}
+
+#ifndef USE_DEFAULT_FDB_DEL_DUMP
+#ifdef USE_CONST_DEV_UC_CHAR
+static int igb_ndo_fdb_del(struct ndmsg *ndm,
+ struct net_device *dev,
+ const unsigned char *addr)
+#else
+static int igb_ndo_fdb_del(struct ndmsg *ndm,
+ struct net_device *dev,
+ unsigned char *addr)
+#endif
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+ int err = -EOPNOTSUPP;
+
+ if (ndm->ndm_state & NUD_PERMANENT) {
+ pr_info("%s: FDB only supports static addresses\n",
+ igb_driver_name);
+ return -EINVAL;
+ }
+
+ if (adapter->vfs_allocated_count) {
+ if (is_unicast_ether_addr(addr))
+ err = dev_uc_del(dev, addr);
+ else if (is_multicast_ether_addr(addr))
+ err = dev_mc_del(dev, addr);
+ else
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int igb_ndo_fdb_dump(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct net_device *dev,
+ int idx)
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+
+ if (adapter->vfs_allocated_count)
+ idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
+
+ return idx;
+}
+#endif /* USE_DEFAULT_FDB_DEL_DUMP */
+
+#ifdef HAVE_BRIDGE_ATTRIBS
+#ifdef HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS
+static int igb_ndo_bridge_setlink(struct net_device *dev,
+ struct nlmsghdr *nlh,
+ u16 flags)
+#else
+static int igb_ndo_bridge_setlink(struct net_device *dev,
+ struct nlmsghdr *nlh)
+#endif /* HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS */
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+ struct e1000_hw *hw = &adapter->hw;
+ struct nlattr *attr, *br_spec;
+ int rem;
+
+ if (!(adapter->vfs_allocated_count))
+ return -EOPNOTSUPP;
+
+ switch (adapter->hw.mac.type) {
+ case e1000_82576:
+ case e1000_i350:
+ case e1000_i354:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+
+ nla_for_each_nested(attr, br_spec, rem) {
+ __u16 mode;
+
+ if (nla_type(attr) != IFLA_BRIDGE_MODE)
+ continue;
+
+ mode = nla_get_u16(attr);
+ if (mode == BRIDGE_MODE_VEPA) {
+ e1000_vmdq_set_loopback_pf(hw, 0);
+ adapter->flags &= ~IGB_FLAG_LOOPBACK_ENABLE;
+ } else if (mode == BRIDGE_MODE_VEB) {
+ e1000_vmdq_set_loopback_pf(hw, 1);
+ adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE;
+ } else
+ return -EINVAL;
+
+ netdev_info(adapter->netdev, "enabling bridge mode: %s\n",
+ mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_BRIDGE_FILTER
+#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
+static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev, u32 filter_mask,
+ int nlflags)
+#else
+static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev, u32 filter_mask)
+#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */
+#else
+static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev)
+#endif
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+ u16 mode;
+
+ if (!(adapter->vfs_allocated_count))
+ return -EOPNOTSUPP;
+
+ if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE)
+ mode = BRIDGE_MODE_VEB;
+ else
+ mode = BRIDGE_MODE_VEPA;
+
+#ifdef HAVE_NDO_DFLT_BRIDGE_ADD_MASK
+#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
+#ifdef HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0,
+ nlflags, filter_mask, NULL);
+#else
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, nlflags);
+#endif /* HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL */
+#else
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0);
+#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */
+#else
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode);
+#endif /* HAVE_NDO_DFLT_BRIDGE_ADD_MASK */
+}
+#endif /* HAVE_BRIDGE_ATTRIBS */
+#endif /* NTF_SELF */
+
+#endif /* HAVE_NDO_SET_FEATURES */
+#ifdef HAVE_NET_DEVICE_OPS
+static const struct net_device_ops igb_netdev_ops = {
+ .ndo_open = igb_open,
+ .ndo_stop = igb_close,
+ .ndo_start_xmit = igb_xmit_frame,
+ .ndo_get_stats = igb_get_stats,
+ .ndo_set_rx_mode = igb_set_rx_mode,
+ .ndo_set_mac_address = igb_set_mac,
+ .ndo_change_mtu = igb_change_mtu,
+ .ndo_do_ioctl = igb_ioctl,
+ .ndo_tx_timeout = igb_tx_timeout,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
+#ifdef IFLA_VF_MAX
+ .ndo_set_vf_mac = igb_ndo_set_vf_mac,
+ .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
+#ifdef HAVE_VF_MIN_MAX_TXRATE
+ .ndo_set_vf_rate = igb_ndo_set_vf_bw,
+#else /* HAVE_VF_MIN_MAX_TXRATE */
+ .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
+#endif /* HAVE_VF_MIN_MAX_TXRATE */
+ .ndo_get_vf_config = igb_ndo_get_vf_config,
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+ .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk,
+#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */
+#endif /* IFLA_VF_MAX */
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = igb_netpoll,
+#endif
+#ifdef HAVE_NDO_SET_FEATURES
+ .ndo_fix_features = igb_fix_features,
+ .ndo_set_features = igb_set_features,
+#endif
+#ifdef HAVE_VLAN_RX_REGISTER
+ .ndo_vlan_rx_register = igb_vlan_mode,
+#endif
+#ifndef HAVE_RHEL6_NETDEV_OPS_EXT_FDB
+#ifdef NTF_SELF
+ .ndo_fdb_add = igb_ndo_fdb_add,
+#ifndef USE_DEFAULT_FDB_DEL_DUMP
+ .ndo_fdb_del = igb_ndo_fdb_del,
+ .ndo_fdb_dump = igb_ndo_fdb_dump,
+#endif
+#endif /* ! HAVE_RHEL6_NETDEV_OPS_EXT_FDB */
+#ifdef HAVE_BRIDGE_ATTRIBS
+ .ndo_bridge_setlink = igb_ndo_bridge_setlink,
+ .ndo_bridge_getlink = igb_ndo_bridge_getlink,
+#endif /* HAVE_BRIDGE_ATTRIBS */
+#endif
+};
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+static const struct net_device_ops igb_vmdq_ops = {
+ .ndo_open = &igb_vmdq_open,
+ .ndo_stop = &igb_vmdq_close,
+ .ndo_start_xmit = &igb_vmdq_xmit_frame,
+ .ndo_get_stats = &igb_vmdq_get_stats,
+ .ndo_set_rx_mode = &igb_vmdq_set_rx_mode,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_set_mac_address = &igb_vmdq_set_mac,
+ .ndo_change_mtu = &igb_vmdq_change_mtu,
+ .ndo_tx_timeout = &igb_vmdq_tx_timeout,
+ .ndo_vlan_rx_register = &igb_vmdq_vlan_rx_register,
+ .ndo_vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid,
+};
+
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+#endif /* HAVE_NET_DEVICE_OPS */
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+void igb_assign_vmdq_netdev_ops(struct net_device *vnetdev)
+{
+#ifdef HAVE_NET_DEVICE_OPS
+ vnetdev->netdev_ops = &igb_vmdq_ops;
+#else
+ dev->open = &igb_vmdq_open;
+ dev->stop = &igb_vmdq_close;
+ dev->hard_start_xmit = &igb_vmdq_xmit_frame;
+ dev->get_stats = &igb_vmdq_get_stats;
+#ifdef HAVE_SET_RX_MODE
+ dev->set_rx_mode = &igb_vmdq_set_rx_mode;
+#endif
+ dev->set_multicast_list = &igb_vmdq_set_rx_mode;
+ dev->set_mac_address = &igb_vmdq_set_mac;
+ dev->change_mtu = &igb_vmdq_change_mtu;
+#ifdef HAVE_TX_TIMEOUT
+ dev->tx_timeout = &igb_vmdq_tx_timeout;
+#endif
+#if defined(NETIF_F_HW_VLAN_TX) || defined(NETIF_F_HW_VLAN_CTAG_TX)
+ dev->vlan_rx_register = &igb_vmdq_vlan_rx_register;
+ dev->vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid;
+ dev->vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid;
+#endif
+#endif
+ igb_vmdq_set_ethtool_ops(vnetdev);
+ vnetdev->watchdog_timeo = 5 * HZ;
+
+}
+
+int igb_init_vmdq_netdevs(struct igb_adapter *adapter)
+{
+ int pool, err = 0, base_queue;
+ struct net_device *vnetdev;
+ struct igb_vmdq_adapter *vmdq_adapter;
+
+ for (pool = 1; pool < adapter->vmdq_pools; pool++) {
+ int qpp = (!adapter->rss_queues ? 1 : adapter->rss_queues);
+ base_queue = pool * qpp;
+ vnetdev = alloc_etherdev(sizeof(struct igb_vmdq_adapter));
+ if (!vnetdev) {
+ err = -ENOMEM;
+ break;
+ }
+ vmdq_adapter = netdev_priv(vnetdev);
+ vmdq_adapter->vnetdev = vnetdev;
+ vmdq_adapter->real_adapter = adapter;
+ vmdq_adapter->rx_ring = adapter->rx_ring[base_queue];
+ vmdq_adapter->tx_ring = adapter->tx_ring[base_queue];
+ igb_assign_vmdq_netdev_ops(vnetdev);
+ snprintf(vnetdev->name, IFNAMSIZ, "%sv%d",
+ adapter->netdev->name, pool);
+ vnetdev->features = adapter->netdev->features;
+#ifdef HAVE_NETDEV_VLAN_FEATURES
+ vnetdev->vlan_features = adapter->netdev->vlan_features;
+#endif
+ adapter->vmdq_netdev[pool-1] = vnetdev;
+ err = register_netdev(vnetdev);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+int igb_remove_vmdq_netdevs(struct igb_adapter *adapter)
+{
+ int pool, err = 0;
+
+ for (pool = 1; pool < adapter->vmdq_pools; pool++) {
+ unregister_netdev(adapter->vmdq_netdev[pool-1]);
+ free_netdev(adapter->vmdq_netdev[pool-1]);
+ adapter->vmdq_netdev[pool-1] = NULL;
+ }
+ return err;
+}
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+
+/**
+ * igb_set_fw_version - Configure version string for ethtool
+ * @adapter: adapter struct
+ *
+ **/
+static void igb_set_fw_version(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct e1000_fw_version fw;
+
+ e1000_get_fw_version(hw, &fw);
+
+ switch (hw->mac.type) {
+ case e1000_i210:
+ case e1000_i211:
+ if (!(e1000_get_flash_presence_i210(hw))) {
+ snprintf(adapter->fw_version,
+ sizeof(adapter->fw_version),
+ "%2d.%2d-%d",
+ fw.invm_major, fw.invm_minor, fw.invm_img_type);
+ break;
+ }
+ /* fall through */
+ default:
+ /* if option rom is valid, display its version too*/
+ if (fw.or_valid) {
+ snprintf(adapter->fw_version,
+ sizeof(adapter->fw_version),
+ "%d.%d, 0x%08x, %d.%d.%d",
+ fw.eep_major, fw.eep_minor, fw.etrack_id,
+ fw.or_major, fw.or_build, fw.or_patch);
+ /* no option rom */
+ } else {
+ if (fw.etrack_id != 0X0000) {
+ snprintf(adapter->fw_version,
+ sizeof(adapter->fw_version),
+ "%d.%d, 0x%08x",
+ fw.eep_major, fw.eep_minor, fw.etrack_id);
+ } else {
+ snprintf(adapter->fw_version,
+ sizeof(adapter->fw_version),
+ "%d.%d.%d",
+ fw.eep_major, fw.eep_minor, fw.eep_build);
+ }
+ }
+ break;
+ }
+
+ return;
+}
+
+/**
+ * igb_init_mas - init Media Autosense feature if enabled in the NVM
+ *
+ * @adapter: adapter struct
+ **/
+static void igb_init_mas(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u16 eeprom_data;
+
+ e1000_read_nvm(hw, NVM_COMPAT, 1, &eeprom_data);
+ switch (hw->bus.func) {
+ case E1000_FUNC_0:
+ if (eeprom_data & IGB_MAS_ENABLE_0)
+ adapter->flags |= IGB_FLAG_MAS_ENABLE;
+ break;
+ case E1000_FUNC_1:
+ if (eeprom_data & IGB_MAS_ENABLE_1)
+ adapter->flags |= IGB_FLAG_MAS_ENABLE;
+ break;
+ case E1000_FUNC_2:
+ if (eeprom_data & IGB_MAS_ENABLE_2)
+ adapter->flags |= IGB_FLAG_MAS_ENABLE;
+ break;
+ case E1000_FUNC_3:
+ if (eeprom_data & IGB_MAS_ENABLE_3)
+ adapter->flags |= IGB_FLAG_MAS_ENABLE;
+ break;
+ default:
+ /* Shouldn't get here */
+ dev_err(pci_dev_to_dev(adapter->pdev),
+ "%s:AMS: Invalid port configuration, returning\n",
+ adapter->netdev->name);
+ break;
+ }
+}
+
+/**
+ * igb_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in igb_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * igb_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int __devinit igb_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct net_device *netdev;
+ struct igb_adapter *adapter;
+ struct e1000_hw *hw;
+ u16 eeprom_data = 0;
+ u8 pba_str[E1000_PBANUM_LENGTH];
+ s32 ret_val;
+ static int global_quad_port_a; /* global quad port a indication */
+ int i, err, pci_using_dac;
+ static int cards_found;
+
+ err = pci_enable_device_mem(pdev);
+ if (err)
+ return err;
+
+ pci_using_dac = 0;
+ err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
+ if (!err) {
+ err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
+ if (!err)
+ pci_using_dac = 1;
+ } else {
+ err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
+ if (err) {
+ err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
+ if (err) {
+ IGB_ERR("No usable DMA configuration, "
+ "aborting\n");
+ goto err_dma;
+ }
+ }
+ }
+
+#ifndef HAVE_ASPM_QUIRKS
+ /* 82575 requires that the pci-e link partner disable the L0s state */
+ switch (pdev->device) {
+ case E1000_DEV_ID_82575EB_COPPER:
+ case E1000_DEV_ID_82575EB_FIBER_SERDES:
+ case E1000_DEV_ID_82575GB_QUAD_COPPER:
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
+ default:
+ break;
+ }
+
+#endif /* HAVE_ASPM_QUIRKS */
+ err = pci_request_selected_regions(pdev,
+ pci_select_bars(pdev,
+ IORESOURCE_MEM),
+ igb_driver_name);
+ if (err)
+ goto err_pci_reg;
+
+ pci_enable_pcie_error_reporting(pdev);
+
+ pci_set_master(pdev);
+
+ err = -ENOMEM;
+#ifdef HAVE_TX_MQ
+ netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
+ IGB_MAX_TX_QUEUES);
+#else
+ netdev = alloc_etherdev(sizeof(struct igb_adapter));
+#endif /* HAVE_TX_MQ */
+ if (!netdev)
+ goto err_alloc_etherdev;
+
+ SET_MODULE_OWNER(netdev);
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+
+ pci_set_drvdata(pdev, netdev);
+ adapter = netdev_priv(netdev);
+ adapter->netdev = netdev;
+ adapter->pdev = pdev;
+ hw = &adapter->hw;
+ hw->back = adapter;
+ adapter->port_num = hw->bus.func;
+ adapter->msg_enable = (1 << debug) - 1;
+
+#ifdef HAVE_PCI_ERS
+ err = pci_save_state(pdev);
+ if (err)
+ goto err_ioremap;
+#endif
+ err = -EIO;
+ hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+ pci_resource_len(pdev, 0));
+ if (!hw->hw_addr)
+ goto err_ioremap;
+
+#ifdef HAVE_NET_DEVICE_OPS
+ netdev->netdev_ops = &igb_netdev_ops;
+#else /* HAVE_NET_DEVICE_OPS */
+ netdev->open = &igb_open;
+ netdev->stop = &igb_close;
+ netdev->get_stats = &igb_get_stats;
+#ifdef HAVE_SET_RX_MODE
+ netdev->set_rx_mode = &igb_set_rx_mode;
+#endif
+ netdev->set_multicast_list = &igb_set_rx_mode;
+ netdev->set_mac_address = &igb_set_mac;
+ netdev->change_mtu = &igb_change_mtu;
+ netdev->do_ioctl = &igb_ioctl;
+#ifdef HAVE_TX_TIMEOUT
+ netdev->tx_timeout = &igb_tx_timeout;
+#endif
+ netdev->vlan_rx_register = igb_vlan_mode;
+ netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
+ netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ netdev->poll_controller = igb_netpoll;
+#endif
+ netdev->hard_start_xmit = &igb_xmit_frame;
+#endif /* HAVE_NET_DEVICE_OPS */
+ igb_set_ethtool_ops(netdev);
+#ifdef HAVE_TX_TIMEOUT
+ netdev->watchdog_timeo = 5 * HZ;
+#endif
+
+ strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
+
+ adapter->bd_number = cards_found;
+
+ /* setup the private structure */
+ err = igb_sw_init(adapter);
+ if (err)
+ goto err_sw_init;
+
+ e1000_get_bus_info(hw);
+
+ hw->phy.autoneg_wait_to_complete = FALSE;
+ hw->mac.adaptive_ifs = FALSE;
+
+ /* Copper options */
+ if (hw->phy.media_type == e1000_media_type_copper) {
+ hw->phy.mdix = AUTO_ALL_MODES;
+ hw->phy.disable_polarity_correction = FALSE;
+ hw->phy.ms_type = e1000_ms_hw_default;
+ }
+
+ if (e1000_check_reset_block(hw))
+ dev_info(pci_dev_to_dev(pdev),
+ "PHY reset is blocked due to SOL/IDER session.\n");
+
+ /*
+ * features is initialized to 0 in allocation, it might have bits
+ * set by igb_sw_init so we should use an or instead of an
+ * assignment.
+ */
+ netdev->features |= NETIF_F_SG |
+ NETIF_F_IP_CSUM |
+#ifdef NETIF_F_IPV6_CSUM
+ NETIF_F_IPV6_CSUM |
+#endif
+#ifdef NETIF_F_TSO
+ NETIF_F_TSO |
+#ifdef NETIF_F_TSO6
+ NETIF_F_TSO6 |
+#endif
+#endif /* NETIF_F_TSO */
+#ifdef NETIF_F_RXHASH
+ NETIF_F_RXHASH |
+#endif
+ NETIF_F_RXCSUM |
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+ NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_VLAN_CTAG_TX;
+#else
+ NETIF_F_HW_VLAN_RX |
+ NETIF_F_HW_VLAN_TX;
+#endif
+
+ if (hw->mac.type >= e1000_82576)
+ netdev->features |= NETIF_F_SCTP_CSUM;
+
+#ifdef HAVE_NDO_SET_FEATURES
+ /* copy netdev features into list of user selectable features */
+ netdev->hw_features |= netdev->features;
+#ifndef IGB_NO_LRO
+
+ /* give us the option of enabling LRO later */
+ netdev->hw_features |= NETIF_F_LRO;
+#endif
+#else
+#ifdef NETIF_F_GRO
+
+ /* this is only needed on kernels prior to 2.6.39 */
+ netdev->features |= NETIF_F_GRO;
+#endif
+#endif
+
+ /* set this bit last since it cannot be part of hw_features */
+#ifdef NETIF_F_HW_VLAN_CTAG_FILTER
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+#else
+ netdev->features |= NETIF_F_HW_VLAN_FILTER;
+#endif
+
+#ifdef HAVE_NETDEV_VLAN_FEATURES
+ netdev->vlan_features |= NETIF_F_TSO |
+ NETIF_F_TSO6 |
+ NETIF_F_IP_CSUM |
+ NETIF_F_IPV6_CSUM |
+ NETIF_F_SG;
+
+#endif
+ if (pci_using_dac)
+ netdev->features |= NETIF_F_HIGHDMA;
+
+ adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
+#ifdef DEBUG
+ if (adapter->dmac != IGB_DMAC_DISABLE)
+ printk("%s: DMA Coalescing is enabled..\n", netdev->name);
+#endif
+
+ /* before reading the NVM, reset the controller to put the device in a
+ * known good starting state */
+ e1000_reset_hw(hw);
+
+ /* make sure the NVM is good */
+ if (e1000_validate_nvm_checksum(hw) < 0) {
+ dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
+ " Valid\n");
+ err = -EIO;
+ goto err_eeprom;
+ }
+
+ /* copy the MAC address out of the NVM */
+ if (e1000_read_mac_addr(hw))
+ dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
+ memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
+#ifdef ETHTOOL_GPERMADDR
+ memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
+
+ if (!is_valid_ether_addr(netdev->perm_addr)) {
+#else
+ if (!is_valid_ether_addr(netdev->dev_addr)) {
+#endif
+ dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
+ err = -EIO;
+ goto err_eeprom;
+ }
+
+ memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
+ adapter->mac_table[0].queue = adapter->vfs_allocated_count;
+ adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
+ igb_rar_set(adapter, 0);
+
+ /* get firmware version for ethtool -i */
+ igb_set_fw_version(adapter);
+
+ /* Check if Media Autosense is enabled */
+ if (hw->mac.type == e1000_82580)
+ igb_init_mas(adapter);
+ setup_timer(&adapter->watchdog_timer, &igb_watchdog,
+ (unsigned long) adapter);
+ if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+ setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
+ (unsigned long) adapter);
+ setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
+ (unsigned long) adapter);
+
+ INIT_WORK(&adapter->reset_task, igb_reset_task);
+ INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
+ if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+ INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
+
+ /* Initialize link properties that are user-changeable */
+ adapter->fc_autoneg = true;
+ hw->mac.autoneg = true;
+ hw->phy.autoneg_advertised = 0x2f;
+
+ hw->fc.requested_mode = e1000_fc_default;
+ hw->fc.current_mode = e1000_fc_default;
+
+ e1000_validate_mdi_setting(hw);
+
+ /* By default, support wake on port A */
+ if (hw->bus.func == 0)
+ adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+
+ /* Check the NVM for wake support for non-port A ports */
+ if (hw->mac.type >= e1000_82580)
+ hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
+ NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
+ &eeprom_data);
+ else if (hw->bus.func == 1)
+ e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
+
+ if (eeprom_data & IGB_EEPROM_APME)
+ adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+
+ /* now that we have the eeprom settings, apply the special cases where
+ * the eeprom may be wrong or the board simply won't support wake on
+ * lan on a particular port */
+ switch (pdev->device) {
+ case E1000_DEV_ID_82575GB_QUAD_COPPER:
+ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+ break;
+ case E1000_DEV_ID_82575EB_FIBER_SERDES:
+ case E1000_DEV_ID_82576_FIBER:
+ case E1000_DEV_ID_82576_SERDES:
+ /* Wake events only supported on port A for dual fiber
+ * regardless of eeprom setting */
+ if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
+ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+ break;
+ case E1000_DEV_ID_82576_QUAD_COPPER:
+ case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
+ /* if quad port adapter, disable WoL on all but port A */
+ if (global_quad_port_a != 0)
+ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+ else
+ adapter->flags |= IGB_FLAG_QUAD_PORT_A;
+ /* Reset for multiple quad port adapters */
+ if (++global_quad_port_a == 4)
+ global_quad_port_a = 0;
+ break;
+ default:
+ /* If the device can't wake, don't set software support */
+ if (!device_can_wakeup(&adapter->pdev->dev))
+ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+ break;
+ }
+
+ /* initialize the wol settings based on the eeprom settings */
+ if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)
+ adapter->wol |= E1000_WUFC_MAG;
+
+ /* Some vendors want WoL disabled by default, but still supported */
+ if ((hw->mac.type == e1000_i350) &&
+ (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
+ adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+ adapter->wol = 0;
+ }
+
+ device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev),
+ adapter->flags & IGB_FLAG_WOL_SUPPORTED);
+
+ /* reset the hardware with the new settings */
+ igb_reset(adapter);
+ adapter->devrc = 0;
+
+#ifdef HAVE_I2C_SUPPORT
+ /* Init the I2C interface */
+ err = igb_init_i2c(adapter);
+ if (err) {
+ dev_err(&pdev->dev, "failed to init i2c interface\n");
+ goto err_eeprom;
+ }
+#endif /* HAVE_I2C_SUPPORT */
+
+ /* let the f/w know that the h/w is now under the control of the
+ * driver. */
+ igb_get_hw_control(adapter);
+
+ strncpy(netdev->name, "eth%d", IFNAMSIZ);
+ err = register_netdev(netdev);
+ if (err)
+ goto err_register;
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ err = igb_init_vmdq_netdevs(adapter);
+ if (err)
+ goto err_register;
+#endif
+ /* carrier off reporting is important to ethtool even BEFORE open */
+ netif_carrier_off(netdev);
+
+#ifdef IGB_DCA
+ if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
+ adapter->flags |= IGB_FLAG_DCA_ENABLED;
+ dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
+ igb_setup_dca(adapter);
+ }
+
+#endif
+#ifdef HAVE_PTP_1588_CLOCK
+ /* do hw tstamp init after resetting */
+ igb_ptp_init(adapter);
+#endif /* HAVE_PTP_1588_CLOCK */
+
+ dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
+ /* print bus type/speed/width info */
+ dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
+ netdev->name,
+ ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
+ (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
+ (hw->mac.type == e1000_i354) ? "integrated" :
+ "unknown"),
+ ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
+ (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
+ (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
+ (hw->mac.type == e1000_i354) ? "integrated" :
+ "unknown"));
+ dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
+ for (i = 0; i < 6; i++)
+ printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
+
+ ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
+ if (ret_val)
+ strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
+ dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
+ pba_str);
+
+
+ /* Initialize the thermal sensor on i350 devices. */
+ if (hw->mac.type == e1000_i350) {
+ if (hw->bus.func == 0) {
+ u16 ets_word;
+
+ /*
+ * Read the NVM to determine if this i350 device
+ * supports an external thermal sensor.
+ */
+ e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
+ if (ets_word != 0x0000 && ets_word != 0xFFFF)
+ adapter->ets = true;
+ else
+ adapter->ets = false;
+ }
+#ifdef IGB_HWMON
+
+ igb_sysfs_init(adapter);
+#else
+#ifdef IGB_PROCFS
+
+ igb_procfs_init(adapter);
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+ } else {
+ adapter->ets = false;
+ }
+
+ if (hw->phy.media_type == e1000_media_type_copper) {
+ switch (hw->mac.type) {
+ case e1000_i350:
+ case e1000_i210:
+ case e1000_i211:
+ /* Enable EEE for internal copper PHY devices */
+ err = e1000_set_eee_i350(hw);
+ if ((!err) &&
+ (adapter->flags & IGB_FLAG_EEE))
+ adapter->eee_advert =
+ MDIO_EEE_100TX | MDIO_EEE_1000T;
+ break;
+ case e1000_i354:
+ if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) &
+ (E1000_CTRL_EXT_LINK_MODE_SGMII)) {
+ err = e1000_set_eee_i354(hw);
+ if ((!err) &&
+ (adapter->flags & IGB_FLAG_EEE))
+ adapter->eee_advert =
+ MDIO_EEE_100TX | MDIO_EEE_1000T;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* send driver version info to firmware */
+ if (hw->mac.type >= e1000_i350)
+ igb_init_fw(adapter);
+
+#ifndef IGB_NO_LRO
+ if (netdev->features & NETIF_F_LRO)
+ dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
+ else
+ dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
+#endif
+ dev_info(pci_dev_to_dev(pdev),
+ "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
+ adapter->msix_entries ? "MSI-X" :
+ (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
+ adapter->num_rx_queues, adapter->num_tx_queues);
+
+ cards_found++;
+
+ pm_runtime_put_noidle(&pdev->dev);
+ return 0;
+
+err_register:
+ igb_release_hw_control(adapter);
+#ifdef HAVE_I2C_SUPPORT
+ memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap));
+#endif /* HAVE_I2C_SUPPORT */
+err_eeprom:
+ if (!e1000_check_reset_block(hw))
+ e1000_phy_hw_reset(hw);
+
+ if (hw->flash_address)
+ iounmap(hw->flash_address);
+err_sw_init:
+ igb_clear_interrupt_scheme(adapter);
+ igb_reset_sriov_capability(adapter);
+ iounmap(hw->hw_addr);
+err_ioremap:
+ free_netdev(netdev);
+err_alloc_etherdev:
+ pci_release_selected_regions(pdev,
+ pci_select_bars(pdev, IORESOURCE_MEM));
+err_pci_reg:
+err_dma:
+ pci_disable_device(pdev);
+ return err;
+}
+#ifdef HAVE_I2C_SUPPORT
+/*
+ * igb_remove_i2c - Cleanup I2C interface
+ * @adapter: pointer to adapter structure
+ *
+ */
+static void igb_remove_i2c(struct igb_adapter *adapter)
+{
+
+ /* free the adapter bus structure */
+ i2c_del_adapter(&adapter->i2c_adap);
+}
+#endif /* HAVE_I2C_SUPPORT */
+
+/**
+ * igb_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * igb_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device. The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void __devexit igb_remove(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+
+ pm_runtime_get_noresume(&pdev->dev);
+#ifdef HAVE_I2C_SUPPORT
+ igb_remove_i2c(adapter);
+#endif /* HAVE_I2C_SUPPORT */
+#ifdef HAVE_PTP_1588_CLOCK
+ igb_ptp_stop(adapter);
+#endif /* HAVE_PTP_1588_CLOCK */
+
+ /* flush_scheduled work may reschedule our watchdog task, so
+ * explicitly disable watchdog tasks from being rescheduled */
+ set_bit(__IGB_DOWN, &adapter->state);
+ del_timer_sync(&adapter->watchdog_timer);
+ if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+ del_timer_sync(&adapter->dma_err_timer);
+ del_timer_sync(&adapter->phy_info_timer);
+
+ flush_scheduled_work();
+
+#ifdef IGB_DCA
+ if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
+ dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
+ dca_remove_requester(&pdev->dev);
+ adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
+ E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
+ }
+#endif
+
+ /* Release control of h/w to f/w. If f/w is AMT enabled, this
+ * would have already happened in close and is redundant. */
+ igb_release_hw_control(adapter);
+
+ unregister_netdev(netdev);
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ igb_remove_vmdq_netdevs(adapter);
+#endif
+
+ igb_clear_interrupt_scheme(adapter);
+ igb_reset_sriov_capability(adapter);
+
+ iounmap(hw->hw_addr);
+ if (hw->flash_address)
+ iounmap(hw->flash_address);
+ pci_release_selected_regions(pdev,
+ pci_select_bars(pdev, IORESOURCE_MEM));
+
+#ifdef IGB_HWMON
+ igb_sysfs_exit(adapter);
+#else
+#ifdef IGB_PROCFS
+ igb_procfs_exit(adapter);
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+ kfree(adapter->mac_table);
+ kfree(adapter->shadow_vfta);
+ free_netdev(netdev);
+
+ pci_disable_pcie_error_reporting(pdev);
+
+ pci_disable_device(pdev);
+}
+
+/**
+ * igb_sw_init - Initialize general software structures (struct igb_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * igb_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int igb_sw_init(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+
+ /* PCI config space info */
+
+ hw->vendor_id = pdev->vendor;
+ hw->device_id = pdev->device;
+ hw->subsystem_vendor_id = pdev->subsystem_vendor;
+ hw->subsystem_device_id = pdev->subsystem_device;
+
+ pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+
+ pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
+
+ /* set default ring sizes */
+ adapter->tx_ring_count = IGB_DEFAULT_TXD;
+ adapter->rx_ring_count = IGB_DEFAULT_RXD;
+
+ /* set default work limits */
+ adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
+
+ adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
+ VLAN_HLEN;
+
+ /* Initialize the hardware-specific values */
+ if (e1000_setup_init_funcs(hw, TRUE)) {
+ dev_err(pci_dev_to_dev(pdev), "Hardware Initialization Failure\n");
+ return -EIO;
+ }
+
+ adapter->mac_table = kzalloc(sizeof(struct igb_mac_addr) *
+ hw->mac.rar_entry_count,
+ GFP_ATOMIC);
+
+ /* Setup and initialize a copy of the hw vlan table array */
+ adapter->shadow_vfta = kzalloc(sizeof(u32) * E1000_VFTA_ENTRIES,
+ GFP_ATOMIC);
+#ifdef NO_KNI
+ /* These calls may decrease the number of queues */
+ if (hw->mac.type < e1000_i210) {
+ igb_set_sriov_capability(adapter);
+ }
+
+ if (igb_init_interrupt_scheme(adapter, true)) {
+ dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
+ return -ENOMEM;
+ }
+
+ /* Explicitly disable IRQ since the NIC can be in any state. */
+ igb_irq_disable(adapter);
+
+ set_bit(__IGB_DOWN, &adapter->state);
+#endif
+ return 0;
+}
+
+/**
+ * igb_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP). At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+static int __igb_open(struct net_device *netdev, bool resuming)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+#ifdef CONFIG_PM_RUNTIME
+ struct pci_dev *pdev = adapter->pdev;
+#endif /* CONFIG_PM_RUNTIME */
+ int err;
+ int i;
+
+ /* disallow open during test */
+ if (test_bit(__IGB_TESTING, &adapter->state)) {
+ WARN_ON(resuming);
+ return -EBUSY;
+ }
+
+#ifdef CONFIG_PM_RUNTIME
+ if (!resuming)
+ pm_runtime_get_sync(&pdev->dev);
+#endif /* CONFIG_PM_RUNTIME */
+
+ netif_carrier_off(netdev);
+
+ /* allocate transmit descriptors */
+ err = igb_setup_all_tx_resources(adapter);
+ if (err)
+ goto err_setup_tx;
+
+ /* allocate receive descriptors */
+ err = igb_setup_all_rx_resources(adapter);
+ if (err)
+ goto err_setup_rx;
+
+ igb_power_up_link(adapter);
+
+ /* before we allocate an interrupt, we must be ready to handle it.
+ * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
+ * as soon as we call pci_request_irq, so we have to setup our
+ * clean_rx handler before we do so. */
+ igb_configure(adapter);
+
+ err = igb_request_irq(adapter);
+ if (err)
+ goto err_req_irq;
+
+ /* Notify the stack of the actual queue counts. */
+ netif_set_real_num_tx_queues(netdev,
+ adapter->vmdq_pools ? 1 :
+ adapter->num_tx_queues);
+
+ err = netif_set_real_num_rx_queues(netdev,
+ adapter->vmdq_pools ? 1 :
+ adapter->num_rx_queues);
+ if (err)
+ goto err_set_queues;
+
+ /* From here on the code is the same as igb_up() */
+ clear_bit(__IGB_DOWN, &adapter->state);
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ napi_enable(&(adapter->q_vector[i]->napi));
+ igb_configure_lli(adapter);
+
+ /* Clear any pending interrupts. */
+ E1000_READ_REG(hw, E1000_ICR);
+
+ igb_irq_enable(adapter);
+
+ /* notify VFs that reset has been completed */
+ if (adapter->vfs_allocated_count) {
+ u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ reg_data |= E1000_CTRL_EXT_PFRSTD;
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
+ }
+
+ netif_tx_start_all_queues(netdev);
+
+ if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+ schedule_work(&adapter->dma_err_task);
+
+ /* start the watchdog. */
+ hw->mac.get_link_status = 1;
+ schedule_work(&adapter->watchdog_task);
+
+ return E1000_SUCCESS;
+
+err_set_queues:
+ igb_free_irq(adapter);
+err_req_irq:
+ igb_release_hw_control(adapter);
+ igb_power_down_link(adapter);
+ igb_free_all_rx_resources(adapter);
+err_setup_rx:
+ igb_free_all_tx_resources(adapter);
+err_setup_tx:
+ igb_reset(adapter);
+
+#ifdef CONFIG_PM_RUNTIME
+ if (!resuming)
+ pm_runtime_put(&pdev->dev);
+#endif /* CONFIG_PM_RUNTIME */
+
+ return err;
+}
+
+static int igb_open(struct net_device *netdev)
+{
+ return __igb_open(netdev, false);
+}
+
+/**
+ * igb_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS. The hardware is still under the driver's control, but
+ * needs to be disabled. A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+static int __igb_close(struct net_device *netdev, bool suspending)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+#ifdef CONFIG_PM_RUNTIME
+ struct pci_dev *pdev = adapter->pdev;
+#endif /* CONFIG_PM_RUNTIME */
+
+ WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
+
+#ifdef CONFIG_PM_RUNTIME
+ if (!suspending)
+ pm_runtime_get_sync(&pdev->dev);
+#endif /* CONFIG_PM_RUNTIME */
+
+ igb_down(adapter);
+
+ igb_release_hw_control(adapter);
+
+ igb_free_irq(adapter);
+
+ igb_free_all_tx_resources(adapter);
+ igb_free_all_rx_resources(adapter);
+
+#ifdef CONFIG_PM_RUNTIME
+ if (!suspending)
+ pm_runtime_put_sync(&pdev->dev);
+#endif /* CONFIG_PM_RUNTIME */
+
+ return 0;
+}
+
+static int igb_close(struct net_device *netdev)
+{
+ return __igb_close(netdev, false);
+}
+
+/**
+ * igb_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ **/
+int igb_setup_tx_resources(struct igb_ring *tx_ring)
+{
+ struct device *dev = tx_ring->dev;
+ int size;
+
+ size = sizeof(struct igb_tx_buffer) * tx_ring->count;
+ tx_ring->tx_buffer_info = vzalloc(size);
+ if (!tx_ring->tx_buffer_info)
+ goto err;
+
+ /* round up to nearest 4K */
+ tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
+ tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+ tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+ &tx_ring->dma, GFP_KERNEL);
+
+ if (!tx_ring->desc)
+ goto err;
+
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+
+ return 0;
+
+err:
+ vfree(tx_ring->tx_buffer_info);
+ dev_err(dev,
+ "Unable to allocate memory for the transmit descriptor ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * igb_setup_all_tx_resources - wrapper to allocate Tx resources
+ * (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ int i, err = 0;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ err = igb_setup_tx_resources(adapter->tx_ring[i]);
+ if (err) {
+ dev_err(pci_dev_to_dev(pdev),
+ "Allocation for Tx Queue %u failed\n", i);
+ for (i--; i >= 0; i--)
+ igb_free_tx_resources(adapter->tx_ring[i]);
+ break;
+ }
+ }
+
+ return err;
+}
+
+/**
+ * igb_setup_tctl - configure the transmit control registers
+ * @adapter: Board private structure
+ **/
+void igb_setup_tctl(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 tctl;
+
+ /* disable queue 0 which is enabled by default on 82575 and 82576 */
+ E1000_WRITE_REG(hw, E1000_TXDCTL(0), 0);
+
+ /* Program the Transmit Control Register */
+ tctl = E1000_READ_REG(hw, E1000_TCTL);
+ tctl &= ~E1000_TCTL_CT;
+ tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
+ (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
+
+ e1000_config_collision_dist(hw);
+
+ /* Enable transmits */
+ tctl |= E1000_TCTL_EN;
+
+ E1000_WRITE_REG(hw, E1000_TCTL, tctl);
+}
+
+static u32 igb_tx_wthresh(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ switch (hw->mac.type) {
+ case e1000_i354:
+ return 4;
+ case e1000_82576:
+ if (adapter->msix_entries)
+ return 1;
+ default:
+ break;
+ }
+
+ return 16;
+}
+
+/**
+ * igb_configure_tx_ring - Configure transmit ring after Reset
+ * @adapter: board private structure
+ * @ring: tx ring to configure
+ *
+ * Configure a transmit ring after a reset.
+ **/
+void igb_configure_tx_ring(struct igb_adapter *adapter,
+ struct igb_ring *ring)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 txdctl = 0;
+ u64 tdba = ring->dma;
+ int reg_idx = ring->reg_idx;
+
+ /* disable the queue */
+ E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), 0);
+ E1000_WRITE_FLUSH(hw);
+ mdelay(10);
+
+ E1000_WRITE_REG(hw, E1000_TDLEN(reg_idx),
+ ring->count * sizeof(union e1000_adv_tx_desc));
+ E1000_WRITE_REG(hw, E1000_TDBAL(reg_idx),
+ tdba & 0x00000000ffffffffULL);
+ E1000_WRITE_REG(hw, E1000_TDBAH(reg_idx), tdba >> 32);
+
+ ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
+ E1000_WRITE_REG(hw, E1000_TDH(reg_idx), 0);
+ writel(0, ring->tail);
+
+ txdctl |= IGB_TX_PTHRESH;
+ txdctl |= IGB_TX_HTHRESH << 8;
+ txdctl |= igb_tx_wthresh(adapter) << 16;
+
+ txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
+ E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), txdctl);
+}
+
+/**
+ * igb_configure_tx - Configure transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void igb_configure_tx(struct igb_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
+}
+
+/**
+ * igb_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring: rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int igb_setup_rx_resources(struct igb_ring *rx_ring)
+{
+ struct device *dev = rx_ring->dev;
+ int size, desc_len;
+
+ size = sizeof(struct igb_rx_buffer) * rx_ring->count;
+ rx_ring->rx_buffer_info = vzalloc(size);
+ if (!rx_ring->rx_buffer_info)
+ goto err;
+
+ desc_len = sizeof(union e1000_adv_rx_desc);
+
+ /* Round up to nearest 4K */
+ rx_ring->size = rx_ring->count * desc_len;
+ rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+ rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+ &rx_ring->dma, GFP_KERNEL);
+
+ if (!rx_ring->desc)
+ goto err;
+
+ rx_ring->next_to_alloc = 0;
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+
+ return 0;
+
+err:
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+ dev_err(dev, "Unable to allocate memory for the receive descriptor"
+ " ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * igb_setup_all_rx_resources - wrapper to allocate Rx resources
+ * (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ int i, err = 0;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ err = igb_setup_rx_resources(adapter->rx_ring[i]);
+ if (err) {
+ dev_err(pci_dev_to_dev(pdev),
+ "Allocation for Rx Queue %u failed\n", i);
+ for (i--; i >= 0; i--)
+ igb_free_rx_resources(adapter->rx_ring[i]);
+ break;
+ }
+ }
+
+ return err;
+}
+
+/**
+ * igb_setup_mrqc - configure the multiple receive queue control registers
+ * @adapter: Board private structure
+ **/
+static void igb_setup_mrqc(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 mrqc, rxcsum;
+ u32 j, num_rx_queues, shift = 0, shift2 = 0;
+ static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741,
+ 0xB08FA343, 0xCB2BCAD0, 0xB4307BAE,
+ 0xA32DCB77, 0x0CF23080, 0x3BB7426A,
+ 0xFA01ACBE };
+
+ /* Fill out hash function seeds */
+ for (j = 0; j < 10; j++)
+ E1000_WRITE_REG(hw, E1000_RSSRK(j), rsskey[j]);
+
+ num_rx_queues = adapter->rss_queues;
+
+ /* 82575 and 82576 supports 2 RSS queues for VMDq */
+ switch (hw->mac.type) {
+ case e1000_82575:
+ if (adapter->vmdq_pools) {
+ shift = 2;
+ shift2 = 6;
+ break;
+ }
+ shift = 6;
+ break;
+ case e1000_82576:
+ /* 82576 supports 2 RSS queues for SR-IOV */
+ if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
+ shift = 3;
+ num_rx_queues = 2;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Populate the redirection table 4 entries at a time. To do this
+ * we are generating the results for n and n+2 and then interleaving
+ * those with the results with n+1 and n+3.
+ */
+ for (j = 0; j < 32; j++) {
+ /* first pass generates n and n+2 */
+ u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues;
+ u32 reta = (base & 0x07800780) >> (7 - shift);
+
+ /* second pass generates n+1 and n+3 */
+ base += 0x00010001 * num_rx_queues;
+ reta |= (base & 0x07800780) << (1 + shift);
+
+ /* generate 2nd table for 82575 based parts */
+ if (shift2)
+ reta |= (0x01010101 * num_rx_queues) << shift2;
+
+ E1000_WRITE_REG(hw, E1000_RETA(j), reta);
+ }
+
+ /*
+ * Disable raw packet checksumming so that RSS hash is placed in
+ * descriptor on writeback. No need to enable TCP/UDP/IP checksum
+ * offloads as they are enabled by default
+ */
+ rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
+ rxcsum |= E1000_RXCSUM_PCSD;
+
+ if (adapter->hw.mac.type >= e1000_82576)
+ /* Enable Receive Checksum Offload for SCTP */
+ rxcsum |= E1000_RXCSUM_CRCOFL;
+
+ /* Don't need to set TUOFL or IPOFL, they default to 1 */
+ E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
+
+ /* Generate RSS hash based on packet types, TCP/UDP
+ * port numbers and/or IPv4/v6 src and dst addresses
+ */
+ mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
+ E1000_MRQC_RSS_FIELD_IPV4_TCP |
+ E1000_MRQC_RSS_FIELD_IPV6 |
+ E1000_MRQC_RSS_FIELD_IPV6_TCP |
+ E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
+
+ if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
+ mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
+ if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
+ mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
+
+ /* If VMDq is enabled then we set the appropriate mode for that, else
+ * we default to RSS so that an RSS hash is calculated per packet even
+ * if we are only using one queue */
+ if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
+ if (hw->mac.type > e1000_82575) {
+ /* Set the default pool for the PF's first queue */
+ u32 vtctl = E1000_READ_REG(hw, E1000_VT_CTL);
+ vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
+ E1000_VT_CTL_DISABLE_DEF_POOL);
+ vtctl |= adapter->vfs_allocated_count <<
+ E1000_VT_CTL_DEFAULT_POOL_SHIFT;
+ E1000_WRITE_REG(hw, E1000_VT_CTL, vtctl);
+ } else if (adapter->rss_queues > 1) {
+ /* set default queue for pool 1 to queue 2 */
+ E1000_WRITE_REG(hw, E1000_VT_CTL,
+ adapter->rss_queues << 7);
+ }
+ if (adapter->rss_queues > 1)
+ mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
+ else
+ mrqc |= E1000_MRQC_ENABLE_VMDQ;
+ } else {
+ mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
+ }
+ igb_vmm_control(adapter);
+
+ E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
+}
+
+/**
+ * igb_setup_rctl - configure the receive control registers
+ * @adapter: Board private structure
+ **/
+void igb_setup_rctl(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 rctl;
+
+ rctl = E1000_READ_REG(hw, E1000_RCTL);
+
+ rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
+ rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
+
+ rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
+ (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
+
+ /*
+ * enable stripping of CRC. It's unlikely this will break BMC
+ * redirection as it did with e1000. Newer features require
+ * that the HW strips the CRC.
+ */
+ rctl |= E1000_RCTL_SECRC;
+
+ /* disable store bad packets and clear size bits. */
+ rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
+
+ /* enable LPE to prevent packets larger than max_frame_size */
+ rctl |= E1000_RCTL_LPE;
+
+ /* disable queue 0 to prevent tail write w/o re-config */
+ E1000_WRITE_REG(hw, E1000_RXDCTL(0), 0);
+
+ /* Attention!!! For SR-IOV PF driver operations you must enable
+ * queue drop for all VF and PF queues to prevent head of line blocking
+ * if an un-trusted VF does not provide descriptors to hardware.
+ */
+ if (adapter->vfs_allocated_count) {
+ /* set all queue drop enable bits */
+ E1000_WRITE_REG(hw, E1000_QDE, ALL_QUEUES);
+ }
+
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+}
+
+static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
+ int vfn)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 vmolr;
+
+ /* if it isn't the PF check to see if VFs are enabled and
+ * increase the size to support vlan tags */
+ if (vfn < adapter->vfs_allocated_count &&
+ adapter->vf_data[vfn].vlans_enabled)
+ size += VLAN_HLEN;
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ if (vfn >= adapter->vfs_allocated_count) {
+ int queue = vfn - adapter->vfs_allocated_count;
+ struct igb_vmdq_adapter *vadapter;
+
+ vadapter = netdev_priv(adapter->vmdq_netdev[queue-1]);
+ if (vadapter->vlgrp)
+ size += VLAN_HLEN;
+ }
+#endif
+ vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
+ vmolr &= ~E1000_VMOLR_RLPML_MASK;
+ vmolr |= size | E1000_VMOLR_LPE;
+ E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
+
+ return 0;
+}
+
+/**
+ * igb_rlpml_set - set maximum receive packet size
+ * @adapter: board private structure
+ *
+ * Configure maximum receivable packet size.
+ **/
+static void igb_rlpml_set(struct igb_adapter *adapter)
+{
+ u32 max_frame_size = adapter->max_frame_size;
+ struct e1000_hw *hw = &adapter->hw;
+ u16 pf_id = adapter->vfs_allocated_count;
+
+ if (adapter->vmdq_pools && hw->mac.type != e1000_82575) {
+ int i;
+ for (i = 0; i < adapter->vmdq_pools; i++)
+ igb_set_vf_rlpml(adapter, max_frame_size, pf_id + i);
+ /*
+ * If we're in VMDQ or SR-IOV mode, then set global RLPML
+ * to our max jumbo frame size, in case we need to enable
+ * jumbo frames on one of the rings later.
+ * This will not pass over-length frames into the default
+ * queue because it's gated by the VMOLR.RLPML.
+ */
+ max_frame_size = MAX_JUMBO_FRAME_SIZE;
+ }
+ /* Set VF RLPML for the PF device. */
+ if (adapter->vfs_allocated_count)
+ igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
+
+ E1000_WRITE_REG(hw, E1000_RLPML, max_frame_size);
+}
+
+static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter,
+ int vfn, bool enable)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 val;
+ void __iomem *reg;
+
+ if (hw->mac.type < e1000_82576)
+ return;
+
+ if (hw->mac.type == e1000_i350)
+ reg = hw->hw_addr + E1000_DVMOLR(vfn);
+ else
+ reg = hw->hw_addr + E1000_VMOLR(vfn);
+
+ val = readl(reg);
+ if (enable)
+ val |= E1000_VMOLR_STRVLAN;
+ else
+ val &= ~(E1000_VMOLR_STRVLAN);
+ writel(val, reg);
+}
+static inline void igb_set_vmolr(struct igb_adapter *adapter,
+ int vfn, bool aupe)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 vmolr;
+
+ /*
+ * This register exists only on 82576 and newer so if we are older then
+ * we should exit and do nothing
+ */
+ if (hw->mac.type < e1000_82576)
+ return;
+
+ vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
+
+ if (aupe)
+ vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
+ else
+ vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
+
+ /* clear all bits that might not be set */
+ vmolr &= ~E1000_VMOLR_RSSE;
+
+ if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
+ vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
+
+ vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
+ vmolr |= E1000_VMOLR_LPE; /* Accept long packets */
+
+ E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
+}
+
+/**
+ * igb_configure_rx_ring - Configure a receive ring after Reset
+ * @adapter: board private structure
+ * @ring: receive ring to be configured
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+void igb_configure_rx_ring(struct igb_adapter *adapter,
+ struct igb_ring *ring)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u64 rdba = ring->dma;
+ int reg_idx = ring->reg_idx;
+ u32 srrctl = 0, rxdctl = 0;
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ /*
+ * RLPML prevents us from receiving a frame larger than max_frame so
+ * it is safe to just set the rx_buffer_len to max_frame without the
+ * risk of an skb over panic.
+ */
+ ring->rx_buffer_len = max_t(u32, adapter->max_frame_size,
+ MAXIMUM_ETHERNET_VLAN_SIZE);
+
+#endif
+ /* disable the queue */
+ E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), 0);
+
+ /* Set DMA base address registers */
+ E1000_WRITE_REG(hw, E1000_RDBAL(reg_idx),
+ rdba & 0x00000000ffffffffULL);
+ E1000_WRITE_REG(hw, E1000_RDBAH(reg_idx), rdba >> 32);
+ E1000_WRITE_REG(hw, E1000_RDLEN(reg_idx),
+ ring->count * sizeof(union e1000_adv_rx_desc));
+
+ /* initialize head and tail */
+ ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
+ E1000_WRITE_REG(hw, E1000_RDH(reg_idx), 0);
+ writel(0, ring->tail);
+
+ /* reset next-to- use/clean to place SW in sync with hardwdare */
+ ring->next_to_clean = 0;
+ ring->next_to_use = 0;
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ ring->next_to_alloc = 0;
+
+#endif
+ /* set descriptor configuration */
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+ srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+ srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
+ E1000_SRRCTL_BSIZEPKT_SHIFT;
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+ srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
+#ifdef HAVE_PTP_1588_CLOCK
+ if (hw->mac.type >= e1000_82580)
+ srrctl |= E1000_SRRCTL_TIMESTAMP;
+#endif /* HAVE_PTP_1588_CLOCK */
+ /*
+ * We should set the drop enable bit if:
+ * SR-IOV is enabled
+ * or
+ * Flow Control is disabled and number of RX queues > 1
+ *
+ * This allows us to avoid head of line blocking for security
+ * and performance reasons.
+ */
+ if (adapter->vfs_allocated_count ||
+ (adapter->num_rx_queues > 1 &&
+ (hw->fc.requested_mode == e1000_fc_none ||
+ hw->fc.requested_mode == e1000_fc_rx_pause)))
+ srrctl |= E1000_SRRCTL_DROP_EN;
+
+ E1000_WRITE_REG(hw, E1000_SRRCTL(reg_idx), srrctl);
+
+ /* set filtering for VMDQ pools */
+ igb_set_vmolr(adapter, reg_idx & 0x7, true);
+
+ rxdctl |= IGB_RX_PTHRESH;
+ rxdctl |= IGB_RX_HTHRESH << 8;
+ rxdctl |= IGB_RX_WTHRESH << 16;
+
+ /* enable receive descriptor fetching */
+ rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
+ E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), rxdctl);
+}
+
+/**
+ * igb_configure_rx - Configure receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void igb_configure_rx(struct igb_adapter *adapter)
+{
+ int i;
+
+ /* set UTA to appropriate mode */
+ igb_set_uta(adapter);
+
+ igb_full_sync_mac_table(adapter);
+ /* Setup the HW Rx Head and Tail Descriptor Pointers and
+ * the Base and Length of the Rx Descriptor Ring */
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
+}
+
+/**
+ * igb_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+void igb_free_tx_resources(struct igb_ring *tx_ring)
+{
+ igb_clean_tx_ring(tx_ring);
+
+ vfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+
+ /* if not set, then don't free */
+ if (!tx_ring->desc)
+ return;
+
+ dma_free_coherent(tx_ring->dev, tx_ring->size,
+ tx_ring->desc, tx_ring->dma);
+
+ tx_ring->desc = NULL;
+}
+
+/**
+ * igb_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+static void igb_free_all_tx_resources(struct igb_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ igb_free_tx_resources(adapter->tx_ring[i]);
+}
+
+void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
+ struct igb_tx_buffer *tx_buffer)
+{
+ if (tx_buffer->skb) {
+ dev_kfree_skb_any(tx_buffer->skb);
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_single(ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ } else if (dma_unmap_len(tx_buffer, len)) {
+ dma_unmap_page(ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ }
+ tx_buffer->next_to_watch = NULL;
+ tx_buffer->skb = NULL;
+ dma_unmap_len_set(tx_buffer, len, 0);
+ /* buffer_info must be completely set up in the transmit path */
+}
+
+/**
+ * igb_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ **/
+static void igb_clean_tx_ring(struct igb_ring *tx_ring)
+{
+ struct igb_tx_buffer *buffer_info;
+ unsigned long size;
+ u16 i;
+
+ if (!tx_ring->tx_buffer_info)
+ return;
+ /* Free all the Tx ring sk_buffs */
+
+ for (i = 0; i < tx_ring->count; i++) {
+ buffer_info = &tx_ring->tx_buffer_info[i];
+ igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
+ }
+
+ netdev_tx_reset_queue(txring_txq(tx_ring));
+
+ size = sizeof(struct igb_tx_buffer) * tx_ring->count;
+ memset(tx_ring->tx_buffer_info, 0, size);
+
+ /* Zero out the descriptor ring */
+ memset(tx_ring->desc, 0, tx_ring->size);
+
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+}
+
+/**
+ * igb_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ igb_clean_tx_ring(adapter->tx_ring[i]);
+}
+
+/**
+ * igb_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+void igb_free_rx_resources(struct igb_ring *rx_ring)
+{
+ igb_clean_rx_ring(rx_ring);
+
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+
+ /* if not set, then don't free */
+ if (!rx_ring->desc)
+ return;
+
+ dma_free_coherent(rx_ring->dev, rx_ring->size,
+ rx_ring->desc, rx_ring->dma);
+
+ rx_ring->desc = NULL;
+}
+
+/**
+ * igb_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+static void igb_free_all_rx_resources(struct igb_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ igb_free_rx_resources(adapter->rx_ring[i]);
+}
+
+/**
+ * igb_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ **/
+void igb_clean_rx_ring(struct igb_ring *rx_ring)
+{
+ unsigned long size;
+ u16 i;
+
+ if (!rx_ring->rx_buffer_info)
+ return;
+
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ if (rx_ring->skb)
+ dev_kfree_skb(rx_ring->skb);
+ rx_ring->skb = NULL;
+
+#endif
+ /* Free all the Rx ring sk_buffs */
+ for (i = 0; i < rx_ring->count; i++) {
+ struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ if (buffer_info->dma) {
+ dma_unmap_single(rx_ring->dev,
+ buffer_info->dma,
+ rx_ring->rx_buffer_len,
+ DMA_FROM_DEVICE);
+ buffer_info->dma = 0;
+ }
+
+ if (buffer_info->skb) {
+ dev_kfree_skb(buffer_info->skb);
+ buffer_info->skb = NULL;
+ }
+#else
+ if (!buffer_info->page)
+ continue;
+
+ dma_unmap_page(rx_ring->dev,
+ buffer_info->dma,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ __free_page(buffer_info->page);
+
+ buffer_info->page = NULL;
+#endif
+ }
+
+ size = sizeof(struct igb_rx_buffer) * rx_ring->count;
+ memset(rx_ring->rx_buffer_info, 0, size);
+
+ /* Zero out the descriptor ring */
+ memset(rx_ring->desc, 0, rx_ring->size);
+
+ rx_ring->next_to_alloc = 0;
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+}
+
+/**
+ * igb_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ igb_clean_rx_ring(adapter->rx_ring[i]);
+}
+
+/**
+ * igb_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int igb_set_mac(struct net_device *netdev, void *p)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ struct sockaddr *addr = p;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ igb_del_mac_filter(adapter, hw->mac.addr,
+ adapter->vfs_allocated_count);
+ memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+ memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+ /* set the correct pool for the new PF MAC address in entry 0 */
+ return igb_add_mac_filter(adapter, hw->mac.addr,
+ adapter->vfs_allocated_count);
+}
+
+/**
+ * igb_write_mc_addr_list - write multicast addresses to MTA
+ * @netdev: network interface device structure
+ *
+ * Writes multicast address list to the MTA hash table.
+ * Returns: -ENOMEM on failure
+ * 0 on no addresses written
+ * X on writing X addresses to MTA
+ **/
+int igb_write_mc_addr_list(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+ struct netdev_hw_addr *ha;
+#else
+ struct dev_mc_list *ha;
+#endif
+ u8 *mta_list;
+ int i, count;
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ int vm;
+#endif
+ count = netdev_mc_count(netdev);
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ for (vm = 1; vm < adapter->vmdq_pools; vm++) {
+ if (!adapter->vmdq_netdev[vm])
+ break;
+ if (!netif_running(adapter->vmdq_netdev[vm]))
+ continue;
+ count += netdev_mc_count(adapter->vmdq_netdev[vm]);
+ }
+#endif
+
+ if (!count) {
+ e1000_update_mc_addr_list(hw, NULL, 0);
+ return 0;
+ }
+ mta_list = kzalloc(count * 6, GFP_ATOMIC);
+ if (!mta_list)
+ return -ENOMEM;
+
+ /* The shared function expects a packed array of only addresses. */
+ i = 0;
+ netdev_for_each_mc_addr(ha, netdev)
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+ memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
+#else
+ memcpy(mta_list + (i++ * ETH_ALEN), ha->dmi_addr, ETH_ALEN);
+#endif
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ for (vm = 1; vm < adapter->vmdq_pools; vm++) {
+ if (!adapter->vmdq_netdev[vm])
+ break;
+ if (!netif_running(adapter->vmdq_netdev[vm]) ||
+ !netdev_mc_count(adapter->vmdq_netdev[vm]))
+ continue;
+ netdev_for_each_mc_addr(ha, adapter->vmdq_netdev[vm])
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+ memcpy(mta_list + (i++ * ETH_ALEN),
+ ha->addr, ETH_ALEN);
+#else
+ memcpy(mta_list + (i++ * ETH_ALEN),
+ ha->dmi_addr, ETH_ALEN);
+#endif
+ }
+#endif
+ e1000_update_mc_addr_list(hw, mta_list, i);
+ kfree(mta_list);
+
+ return count;
+}
+
+void igb_rar_set(struct igb_adapter *adapter, u32 index)
+{
+ u32 rar_low, rar_high;
+ struct e1000_hw *hw = &adapter->hw;
+ u8 *addr = adapter->mac_table[index].addr;
+ /* HW expects these in little endian so we reverse the byte order
+ * from network order (big endian) to little endian
+ */
+ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
+ ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
+ rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
+
+ /* Indicate to hardware the Address is Valid. */
+ if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE)
+ rar_high |= E1000_RAH_AV;
+
+ if (hw->mac.type == e1000_82575)
+ rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue;
+ else
+ rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue;
+
+ E1000_WRITE_REG(hw, E1000_RAL(index), rar_low);
+ E1000_WRITE_FLUSH(hw);
+ E1000_WRITE_REG(hw, E1000_RAH(index), rar_high);
+ E1000_WRITE_FLUSH(hw);
+}
+
+void igb_full_sync_mac_table(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int i;
+ for (i = 0; i < hw->mac.rar_entry_count; i++) {
+ igb_rar_set(adapter, i);
+ }
+}
+
+void igb_sync_mac_table(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int i;
+ for (i = 0; i < hw->mac.rar_entry_count; i++) {
+ if (adapter->mac_table[i].state & IGB_MAC_STATE_MODIFIED)
+ igb_rar_set(adapter, i);
+ adapter->mac_table[i].state &= ~(IGB_MAC_STATE_MODIFIED);
+ }
+}
+
+int igb_available_rars(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int i, count = 0;
+
+ for (i = 0; i < hw->mac.rar_entry_count; i++) {
+ if (adapter->mac_table[i].state == 0)
+ count++;
+ }
+ return count;
+}
+
+#ifdef HAVE_SET_RX_MODE
+/**
+ * igb_write_uc_addr_list - write unicast addresses to RAR table
+ * @netdev: network interface device structure
+ *
+ * Writes unicast address list to the RAR table.
+ * Returns: -ENOMEM on failure/insufficient address space
+ * 0 on no addresses written
+ * X on writing X addresses to the RAR table
+ **/
+static int igb_write_uc_addr_list(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ unsigned int vfn = adapter->vfs_allocated_count;
+ int count = 0;
+
+ /* return ENOMEM indicating insufficient memory for addresses */
+ if (netdev_uc_count(netdev) > igb_available_rars(adapter))
+ return -ENOMEM;
+ if (!netdev_uc_empty(netdev)) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+ struct netdev_hw_addr *ha;
+#else
+ struct dev_mc_list *ha;
+#endif
+ netdev_for_each_uc_addr(ha, netdev) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+ igb_del_mac_filter(adapter, ha->addr, vfn);
+ igb_add_mac_filter(adapter, ha->addr, vfn);
+#else
+ igb_del_mac_filter(adapter, ha->da_addr, vfn);
+ igb_add_mac_filter(adapter, ha->da_addr, vfn);
+#endif
+ count++;
+ }
+ }
+ return count;
+}
+
+#endif /* HAVE_SET_RX_MODE */
+/**
+ * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_mode entry point is called whenever the unicast or multicast
+ * address lists or the network interface flags are updated. This routine is
+ * responsible for configuring the hardware for proper unicast, multicast,
+ * promiscuous mode, and all-multi behavior.
+ **/
+static void igb_set_rx_mode(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ unsigned int vfn = adapter->vfs_allocated_count;
+ u32 rctl, vmolr = 0;
+ int count;
+
+ /* Check for Promiscuous and All Multicast modes */
+ rctl = E1000_READ_REG(hw, E1000_RCTL);
+
+ /* clear the effected bits */
+ rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
+
+ if (netdev->flags & IFF_PROMISC) {
+ rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
+ vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
+ /* retain VLAN HW filtering if in VT mode */
+ if (adapter->vfs_allocated_count || adapter->vmdq_pools)
+ rctl |= E1000_RCTL_VFE;
+ } else {
+ if (netdev->flags & IFF_ALLMULTI) {
+ rctl |= E1000_RCTL_MPE;
+ vmolr |= E1000_VMOLR_MPME;
+ } else {
+ /*
+ * Write addresses to the MTA, if the attempt fails
+ * then we should just turn on promiscuous mode so
+ * that we can at least receive multicast traffic
+ */
+ count = igb_write_mc_addr_list(netdev);
+ if (count < 0) {
+ rctl |= E1000_RCTL_MPE;
+ vmolr |= E1000_VMOLR_MPME;
+ } else if (count) {
+ vmolr |= E1000_VMOLR_ROMPE;
+ }
+ }
+#ifdef HAVE_SET_RX_MODE
+ /*
+ * Write addresses to available RAR registers, if there is not
+ * sufficient space to store all the addresses then enable
+ * unicast promiscuous mode
+ */
+ count = igb_write_uc_addr_list(netdev);
+ if (count < 0) {
+ rctl |= E1000_RCTL_UPE;
+ vmolr |= E1000_VMOLR_ROPE;
+ }
+#endif /* HAVE_SET_RX_MODE */
+ rctl |= E1000_RCTL_VFE;
+ }
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+
+ /*
+ * In order to support SR-IOV and eventually VMDq it is necessary to set
+ * the VMOLR to enable the appropriate modes. Without this workaround
+ * we will have issues with VLAN tag stripping not being done for frames
+ * that are only arriving because we are the default pool
+ */
+ if (hw->mac.type < e1000_82576)
+ return;
+
+ vmolr |= E1000_READ_REG(hw, E1000_VMOLR(vfn)) &
+ ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
+ E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
+ igb_restore_vf_multicasts(adapter);
+}
+
+static void igb_check_wvbr(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 wvbr = 0;
+
+ switch (hw->mac.type) {
+ case e1000_82576:
+ case e1000_i350:
+ if (!(wvbr = E1000_READ_REG(hw, E1000_WVBR)))
+ return;
+ break;
+ default:
+ break;
+ }
+
+ adapter->wvbr |= wvbr;
+}
+
+#define IGB_STAGGERED_QUEUE_OFFSET 8
+
+static void igb_spoof_check(struct igb_adapter *adapter)
+{
+ int j;
+
+ if (!adapter->wvbr)
+ return;
+
+ switch (adapter->hw.mac.type) {
+ case e1000_82576:
+ for (j = 0; j < adapter->vfs_allocated_count; j++) {
+ if (adapter->wvbr & (1 << j) ||
+ adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
+ DPRINTK(DRV, WARNING,
+ "Spoof event(s) detected on VF %d\n", j);
+ adapter->wvbr &=
+ ~((1 << j) |
+ (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
+ }
+ }
+ break;
+ case e1000_i350:
+ for (j = 0; j < adapter->vfs_allocated_count; j++) {
+ if (adapter->wvbr & (1 << j)) {
+ DPRINTK(DRV, WARNING,
+ "Spoof event(s) detected on VF %d\n", j);
+ adapter->wvbr &= ~(1 << j);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+/* Need to wait a few seconds after link up to get diagnostic information from
+ * the phy */
+static void igb_update_phy_info(unsigned long data)
+{
+ struct igb_adapter *adapter = (struct igb_adapter *) data;
+ e1000_get_phy_info(&adapter->hw);
+}
+
+/**
+ * igb_has_link - check shared code for link and determine up/down
+ * @adapter: pointer to driver private info
+ **/
+bool igb_has_link(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ bool link_active = FALSE;
+
+ /* get_link_status is set on LSC (link status) interrupt or
+ * rx sequence error interrupt. get_link_status will stay
+ * false until the e1000_check_for_link establishes link
+ * for copper adapters ONLY
+ */
+ switch (hw->phy.media_type) {
+ case e1000_media_type_copper:
+ if (!hw->mac.get_link_status)
+ return true;
+ case e1000_media_type_internal_serdes:
+ e1000_check_for_link(hw);
+ link_active = !hw->mac.get_link_status;
+ break;
+ case e1000_media_type_unknown:
+ default:
+ break;
+ }
+
+ if (((hw->mac.type == e1000_i210) ||
+ (hw->mac.type == e1000_i211)) &&
+ (hw->phy.id == I210_I_PHY_ID)) {
+ if (!netif_carrier_ok(adapter->netdev)) {
+ adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
+ } else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) {
+ adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE;
+ adapter->link_check_timeout = jiffies;
+ }
+ }
+
+ return link_active;
+}
+
+/**
+ * igb_watchdog - Timer Call-back
+ * @data: pointer to adapter cast into an unsigned long
+ **/
+static void igb_watchdog(unsigned long data)
+{
+ struct igb_adapter *adapter = (struct igb_adapter *)data;
+ /* Do the rest outside of interrupt context */
+ schedule_work(&adapter->watchdog_task);
+}
+
+static void igb_watchdog_task(struct work_struct *work)
+{
+ struct igb_adapter *adapter = container_of(work,
+ struct igb_adapter,
+ watchdog_task);
+ struct e1000_hw *hw = &adapter->hw;
+ struct net_device *netdev = adapter->netdev;
+ u32 link;
+ int i;
+ u32 thstat, ctrl_ext;
+ u32 connsw;
+
+ link = igb_has_link(adapter);
+ /* Force link down if we have fiber to swap to */
+ if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
+ if (hw->phy.media_type == e1000_media_type_copper) {
+ connsw = E1000_READ_REG(hw, E1000_CONNSW);
+ if (!(connsw & E1000_CONNSW_AUTOSENSE_EN))
+ link = 0;
+ }
+ }
+
+ if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) {
+ if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
+ adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
+ else
+ link = FALSE;
+ }
+
+ if (link) {
+ /* Perform a reset if the media type changed. */
+ if (hw->dev_spec._82575.media_changed) {
+ hw->dev_spec._82575.media_changed = false;
+ adapter->flags |= IGB_FLAG_MEDIA_RESET;
+ igb_reset(adapter);
+ }
+
+ /* Cancel scheduled suspend requests. */
+ pm_runtime_resume(netdev->dev.parent);
+
+ if (!netif_carrier_ok(netdev)) {
+ u32 ctrl;
+ e1000_get_speed_and_duplex(hw,
+ &adapter->link_speed,
+ &adapter->link_duplex);
+
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ /* Links status message must follow this format */
+ printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
+ "Flow Control: %s\n",
+ netdev->name,
+ adapter->link_speed,
+ adapter->link_duplex == FULL_DUPLEX ?
+ "Full Duplex" : "Half Duplex",
+ ((ctrl & E1000_CTRL_TFCE) &&
+ (ctrl & E1000_CTRL_RFCE)) ? "RX/TX":
+ ((ctrl & E1000_CTRL_RFCE) ? "RX" :
+ ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
+ /* adjust timeout factor according to speed/duplex */
+ adapter->tx_timeout_factor = 1;
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ adapter->tx_timeout_factor = 14;
+ break;
+ case SPEED_100:
+ /* maybe add some timeout factor ? */
+ break;
+ default:
+ break;
+ }
+
+ netif_carrier_on(netdev);
+ netif_tx_wake_all_queues(netdev);
+
+ igb_ping_all_vfs(adapter);
+#ifdef IFLA_VF_MAX
+ igb_check_vf_rate_limit(adapter);
+#endif /* IFLA_VF_MAX */
+
+ /* link state has changed, schedule phy info update */
+ if (!test_bit(__IGB_DOWN, &adapter->state))
+ mod_timer(&adapter->phy_info_timer,
+ round_jiffies(jiffies + 2 * HZ));
+ }
+ } else {
+ if (netif_carrier_ok(netdev)) {
+ adapter->link_speed = 0;
+ adapter->link_duplex = 0;
+ /* check for thermal sensor event on i350 */
+ if (hw->mac.type == e1000_i350) {
+ thstat = E1000_READ_REG(hw, E1000_THSTAT);
+ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ if ((hw->phy.media_type ==
+ e1000_media_type_copper) &&
+ !(ctrl_ext &
+ E1000_CTRL_EXT_LINK_MODE_SGMII)) {
+ if (thstat & E1000_THSTAT_PWR_DOWN) {
+ printk(KERN_ERR "igb: %s The "
+ "network adapter was stopped "
+ "because it overheated.\n",
+ netdev->name);
+ }
+ if (thstat & E1000_THSTAT_LINK_THROTTLE) {
+ printk(KERN_INFO
+ "igb: %s The network "
+ "adapter supported "
+ "link speed "
+ "was downshifted "
+ "because it "
+ "overheated.\n",
+ netdev->name);
+ }
+ }
+ }
+
+ /* Links status message must follow this format */
+ printk(KERN_INFO "igb: %s NIC Link is Down\n",
+ netdev->name);
+ netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(netdev);
+
+ igb_ping_all_vfs(adapter);
+
+ /* link state has changed, schedule phy info update */
+ if (!test_bit(__IGB_DOWN, &adapter->state))
+ mod_timer(&adapter->phy_info_timer,
+ round_jiffies(jiffies + 2 * HZ));
+ /* link is down, time to check for alternate media */
+ if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
+ igb_check_swap_media(adapter);
+ if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
+ schedule_work(&adapter->reset_task);
+ /* return immediately */
+ return;
+ }
+ }
+ pm_schedule_suspend(netdev->dev.parent,
+ MSEC_PER_SEC * 5);
+
+ /* also check for alternate media here */
+ } else if (!netif_carrier_ok(netdev) &&
+ (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
+ hw->mac.ops.power_up_serdes(hw);
+ igb_check_swap_media(adapter);
+ if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
+ schedule_work(&adapter->reset_task);
+ /* return immediately */
+ return;
+ }
+ }
+ }
+
+ igb_update_stats(adapter);
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igb_ring *tx_ring = adapter->tx_ring[i];
+ if (!netif_carrier_ok(netdev)) {
+ /* We've lost link, so the controller stops DMA,
+ * but we've got queued Tx work that's never going
+ * to get done, so reset controller to flush Tx.
+ * (Do the reset outside of interrupt context). */
+ if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
+ adapter->tx_timeout_count++;
+ schedule_work(&adapter->reset_task);
+ /* return immediately since reset is imminent */
+ return;
+ }
+ }
+
+ /* Force detection of hung controller every watchdog period */
+ set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+ }
+
+ /* Cause software interrupt to ensure rx ring is cleaned */
+ if (adapter->msix_entries) {
+ u32 eics = 0;
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ eics |= adapter->q_vector[i]->eims_value;
+ E1000_WRITE_REG(hw, E1000_EICS, eics);
+ } else {
+ E1000_WRITE_REG(hw, E1000_ICS, E1000_ICS_RXDMT0);
+ }
+
+ igb_spoof_check(adapter);
+
+ /* Reset the timer */
+ if (!test_bit(__IGB_DOWN, &adapter->state)) {
+ if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)
+ mod_timer(&adapter->watchdog_timer,
+ round_jiffies(jiffies + HZ));
+ else
+ mod_timer(&adapter->watchdog_timer,
+ round_jiffies(jiffies + 2 * HZ));
+ }
+}
+
+static void igb_dma_err_task(struct work_struct *work)
+{
+ struct igb_adapter *adapter = container_of(work,
+ struct igb_adapter,
+ dma_err_task);
+ int vf;
+ struct e1000_hw *hw = &adapter->hw;
+ struct net_device *netdev = adapter->netdev;
+ u32 hgptc;
+ u32 ciaa, ciad;
+
+ hgptc = E1000_READ_REG(hw, E1000_HGPTC);
+ if (hgptc) /* If incrementing then no need for the check below */
+ goto dma_timer_reset;
+ /*
+ * Check to see if a bad DMA write target from an errant or
+ * malicious VF has caused a PCIe error. If so then we can
+ * issue a VFLR to the offending VF(s) and then resume without
+ * requesting a full slot reset.
+ */
+
+ for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
+ ciaa = (vf << 16) | 0x80000000;
+ /* 32 bit read so align, we really want status at offset 6 */
+ ciaa |= PCI_COMMAND;
+ E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
+ ciad = E1000_READ_REG(hw, E1000_CIAD);
+ ciaa &= 0x7FFFFFFF;
+ /* disable debug mode asap after reading data */
+ E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
+ /* Get the upper 16 bits which will be the PCI status reg */
+ ciad >>= 16;
+ if (ciad & (PCI_STATUS_REC_MASTER_ABORT |
+ PCI_STATUS_REC_TARGET_ABORT |
+ PCI_STATUS_SIG_SYSTEM_ERROR)) {
+ netdev_err(netdev, "VF %d suffered error\n", vf);
+ /* Issue VFLR */
+ ciaa = (vf << 16) | 0x80000000;
+ ciaa |= 0xA8;
+ E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
+ ciad = 0x00008000; /* VFLR */
+ E1000_WRITE_REG(hw, E1000_CIAD, ciad);
+ ciaa &= 0x7FFFFFFF;
+ E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
+ }
+ }
+dma_timer_reset:
+ /* Reset the timer */
+ if (!test_bit(__IGB_DOWN, &adapter->state))
+ mod_timer(&adapter->dma_err_timer,
+ round_jiffies(jiffies + HZ / 10));
+}
+
+/**
+ * igb_dma_err_timer - Timer Call-back
+ * @data: pointer to adapter cast into an unsigned long
+ **/
+static void igb_dma_err_timer(unsigned long data)
+{
+ struct igb_adapter *adapter = (struct igb_adapter *)data;
+ /* Do the rest outside of interrupt context */
+ schedule_work(&adapter->dma_err_task);
+}
+
+enum latency_range {
+ lowest_latency = 0,
+ low_latency = 1,
+ bulk_latency = 2,
+ latency_invalid = 255
+};
+
+/**
+ * igb_update_ring_itr - update the dynamic ITR value based on packet size
+ *
+ * Stores a new ITR value based on strictly on packet size. This
+ * algorithm is less sophisticated than that used in igb_update_itr,
+ * due to the difficulty of synchronizing statistics across multiple
+ * receive rings. The divisors and thresholds used by this function
+ * were determined based on theoretical maximum wire speed and testing
+ * data, in order to minimize response time while increasing bulk
+ * throughput.
+ * This functionality is controlled by the InterruptThrottleRate module
+ * parameter (see igb_param.c)
+ * NOTE: This function is called only when operating in a multiqueue
+ * receive environment.
+ * @q_vector: pointer to q_vector
+ **/
+static void igb_update_ring_itr(struct igb_q_vector *q_vector)
+{
+ int new_val = q_vector->itr_val;
+ int avg_wire_size = 0;
+ struct igb_adapter *adapter = q_vector->adapter;
+ unsigned int packets;
+
+ /* For non-gigabit speeds, just fix the interrupt rate at 4000
+ * ints/sec - ITR timer value of 120 ticks.
+ */
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ case SPEED_100:
+ new_val = IGB_4K_ITR;
+ goto set_itr_val;
+ default:
+ break;
+ }
+
+ packets = q_vector->rx.total_packets;
+ if (packets)
+ avg_wire_size = q_vector->rx.total_bytes / packets;
+
+ packets = q_vector->tx.total_packets;
+ if (packets)
+ avg_wire_size = max_t(u32, avg_wire_size,
+ q_vector->tx.total_bytes / packets);
+
+ /* if avg_wire_size isn't set no work was done */
+ if (!avg_wire_size)
+ goto clear_counts;
+
+ /* Add 24 bytes to size to account for CRC, preamble, and gap */
+ avg_wire_size += 24;
+
+ /* Don't starve jumbo frames */
+ avg_wire_size = min(avg_wire_size, 3000);
+
+ /* Give a little boost to mid-size frames */
+ if ((avg_wire_size > 300) && (avg_wire_size < 1200))
+ new_val = avg_wire_size / 3;
+ else
+ new_val = avg_wire_size / 2;
+
+ /* conservative mode (itr 3) eliminates the lowest_latency setting */
+ if (new_val < IGB_20K_ITR &&
+ ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+ (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+ new_val = IGB_20K_ITR;
+
+set_itr_val:
+ if (new_val != q_vector->itr_val) {
+ q_vector->itr_val = new_val;
+ q_vector->set_itr = 1;
+ }
+clear_counts:
+ q_vector->rx.total_bytes = 0;
+ q_vector->rx.total_packets = 0;
+ q_vector->tx.total_bytes = 0;
+ q_vector->tx.total_packets = 0;
+}
+
+/**
+ * igb_update_itr - update the dynamic ITR value based on statistics
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt. The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern. Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ * this functionality is controlled by the InterruptThrottleRate module
+ * parameter (see igb_param.c)
+ * NOTE: These calculations are only valid when operating in a single-
+ * queue environment.
+ * @q_vector: pointer to q_vector
+ * @ring_container: ring info to update the itr for
+ **/
+static void igb_update_itr(struct igb_q_vector *q_vector,
+ struct igb_ring_container *ring_container)
+{
+ unsigned int packets = ring_container->total_packets;
+ unsigned int bytes = ring_container->total_bytes;
+ u8 itrval = ring_container->itr;
+
+ /* no packets, exit with status unchanged */
+ if (packets == 0)
+ return;
+
+ switch (itrval) {
+ case lowest_latency:
+ /* handle TSO and jumbo frames */
+ if (bytes/packets > 8000)
+ itrval = bulk_latency;
+ else if ((packets < 5) && (bytes > 512))
+ itrval = low_latency;
+ break;
+ case low_latency: /* 50 usec aka 20000 ints/s */
+ if (bytes > 10000) {
+ /* this if handles the TSO accounting */
+ if (bytes/packets > 8000) {
+ itrval = bulk_latency;
+ } else if ((packets < 10) || ((bytes/packets) > 1200)) {
+ itrval = bulk_latency;
+ } else if ((packets > 35)) {
+ itrval = lowest_latency;
+ }
+ } else if (bytes/packets > 2000) {
+ itrval = bulk_latency;
+ } else if (packets <= 2 && bytes < 512) {
+ itrval = lowest_latency;
+ }
+ break;
+ case bulk_latency: /* 250 usec aka 4000 ints/s */
+ if (bytes > 25000) {
+ if (packets > 35)
+ itrval = low_latency;
+ } else if (bytes < 1500) {
+ itrval = low_latency;
+ }
+ break;
+ }
+
+ /* clear work counters since we have the values we need */
+ ring_container->total_bytes = 0;
+ ring_container->total_packets = 0;
+
+ /* write updated itr to ring container */
+ ring_container->itr = itrval;
+}
+
+static void igb_set_itr(struct igb_q_vector *q_vector)
+{
+ struct igb_adapter *adapter = q_vector->adapter;
+ u32 new_itr = q_vector->itr_val;
+ u8 current_itr = 0;
+
+ /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ case SPEED_100:
+ current_itr = 0;
+ new_itr = IGB_4K_ITR;
+ goto set_itr_now;
+ default:
+ break;
+ }
+
+ igb_update_itr(q_vector, &q_vector->tx);
+ igb_update_itr(q_vector, &q_vector->rx);
+
+ current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+
+ /* conservative mode (itr 3) eliminates the lowest_latency setting */
+ if (current_itr == lowest_latency &&
+ ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+ (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+ current_itr = low_latency;
+
+ switch (current_itr) {
+ /* counts and packets in update_itr are dependent on these numbers */
+ case lowest_latency:
+ new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
+ break;
+ case low_latency:
+ new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
+ break;
+ case bulk_latency:
+ new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
+ break;
+ default:
+ break;
+ }
+
+set_itr_now:
+ if (new_itr != q_vector->itr_val) {
+ /* this attempts to bias the interrupt rate towards Bulk
+ * by adding intermediate steps when interrupt rate is
+ * increasing */
+ new_itr = new_itr > q_vector->itr_val ?
+ max((new_itr * q_vector->itr_val) /
+ (new_itr + (q_vector->itr_val >> 2)),
+ new_itr) :
+ new_itr;
+ /* Don't write the value here; it resets the adapter's
+ * internal timer, and causes us to delay far longer than
+ * we should between interrupts. Instead, we write the ITR
+ * value at the beginning of the next interrupt so the timing
+ * ends up being correct.
+ */
+ q_vector->itr_val = new_itr;
+ q_vector->set_itr = 1;
+ }
+}
+
+void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
+ u32 type_tucmd, u32 mss_l4len_idx)
+{
+ struct e1000_adv_tx_context_desc *context_desc;
+ u16 i = tx_ring->next_to_use;
+
+ context_desc = IGB_TX_CTXTDESC(tx_ring, i);
+
+ i++;
+ tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+ /* set bits to identify this as an advanced context descriptor */
+ type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
+
+ /* For 82575, context index must be unique per ring. */
+ if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+ mss_l4len_idx |= tx_ring->reg_idx << 4;
+
+ context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
+ context_desc->seqnum_seed = 0;
+ context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
+ context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
+}
+
+static int igb_tso(struct igb_ring *tx_ring,
+ struct igb_tx_buffer *first,
+ u8 *hdr_len)
+{
+#ifdef NETIF_F_TSO
+ struct sk_buff *skb = first->skb;
+ u32 vlan_macip_lens, type_tucmd;
+ u32 mss_l4len_idx, l4len;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return 0;
+
+ if (!skb_is_gso(skb))
+#endif /* NETIF_F_TSO */
+ return 0;
+#ifdef NETIF_F_TSO
+
+ if (skb_header_cloned(skb)) {
+ int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (err)
+ return err;
+ }
+
+ /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
+ type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
+
+ if (first->protocol == __constant_htons(ETH_P_IP)) {
+ struct iphdr *iph = ip_hdr(skb);
+ iph->tot_len = 0;
+ iph->check = 0;
+ tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+ iph->daddr, 0,
+ IPPROTO_TCP,
+ 0);
+ type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
+ first->tx_flags |= IGB_TX_FLAGS_TSO |
+ IGB_TX_FLAGS_CSUM |
+ IGB_TX_FLAGS_IPV4;
+#ifdef NETIF_F_TSO6
+ } else if (skb_is_gso_v6(skb)) {
+ ipv6_hdr(skb)->payload_len = 0;
+ tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ 0, IPPROTO_TCP, 0);
+ first->tx_flags |= IGB_TX_FLAGS_TSO |
+ IGB_TX_FLAGS_CSUM;
+#endif
+ }
+
+ /* compute header lengths */
+ l4len = tcp_hdrlen(skb);
+ *hdr_len = skb_transport_offset(skb) + l4len;
+
+ /* update gso size and bytecount with header size */
+ first->gso_segs = skb_shinfo(skb)->gso_segs;
+ first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
+ /* MSS L4LEN IDX */
+ mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
+ mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
+
+ /* VLAN MACLEN IPLEN */
+ vlan_macip_lens = skb_network_header_len(skb);
+ vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
+ vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
+
+ igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
+
+ return 1;
+#endif /* NETIF_F_TSO */
+}
+
+static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
+{
+ struct sk_buff *skb = first->skb;
+ u32 vlan_macip_lens = 0;
+ u32 mss_l4len_idx = 0;
+ u32 type_tucmd = 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
+ return;
+ } else {
+ u8 nexthdr = 0;
+ switch (first->protocol) {
+ case __constant_htons(ETH_P_IP):
+ vlan_macip_lens |= skb_network_header_len(skb);
+ type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
+ nexthdr = ip_hdr(skb)->protocol;
+ break;
+#ifdef NETIF_F_IPV6_CSUM
+ case __constant_htons(ETH_P_IPV6):
+ vlan_macip_lens |= skb_network_header_len(skb);
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ break;
+#endif
+ default:
+ if (unlikely(net_ratelimit())) {
+ dev_warn(tx_ring->dev,
+ "partial checksum but proto=%x!\n",
+ first->protocol);
+ }
+ break;
+ }
+
+ switch (nexthdr) {
+ case IPPROTO_TCP:
+ type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+ mss_l4len_idx = tcp_hdrlen(skb) <<
+ E1000_ADVTXD_L4LEN_SHIFT;
+ break;
+#ifdef HAVE_SCTP
+ case IPPROTO_SCTP:
+ type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
+ mss_l4len_idx = sizeof(struct sctphdr) <<
+ E1000_ADVTXD_L4LEN_SHIFT;
+ break;
+#endif
+ case IPPROTO_UDP:
+ mss_l4len_idx = sizeof(struct udphdr) <<
+ E1000_ADVTXD_L4LEN_SHIFT;
+ break;
+ default:
+ if (unlikely(net_ratelimit())) {
+ dev_warn(tx_ring->dev,
+ "partial checksum but l4 proto=%x!\n",
+ nexthdr);
+ }
+ break;
+ }
+
+ /* update TX checksum flag */
+ first->tx_flags |= IGB_TX_FLAGS_CSUM;
+ }
+
+ vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
+ vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
+
+ igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
+}
+
+#define IGB_SET_FLAG(_input, _flag, _result) \
+ ((_flag <= _result) ? \
+ ((u32)(_input & _flag) * (_result / _flag)) : \
+ ((u32)(_input & _flag) / (_flag / _result)))
+
+static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
+{
+ /* set type for advanced descriptor with frame checksum insertion */
+ u32 cmd_type = E1000_ADVTXD_DTYP_DATA |
+ E1000_ADVTXD_DCMD_DEXT |
+ E1000_ADVTXD_DCMD_IFCS;
+
+ /* set HW vlan bit if vlan is present */
+ cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN,
+ (E1000_ADVTXD_DCMD_VLE));
+
+ /* set segmentation bits for TSO */
+ cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO,
+ (E1000_ADVTXD_DCMD_TSE));
+
+ /* set timestamp bit if present */
+ cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP,
+ (E1000_ADVTXD_MAC_TSTAMP));
+
+ return cmd_type;
+}
+
+static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
+ union e1000_adv_tx_desc *tx_desc,
+ u32 tx_flags, unsigned int paylen)
+{
+ u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
+
+ /* 82575 requires a unique index per ring */
+ if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+ olinfo_status |= tx_ring->reg_idx << 4;
+
+ /* insert L4 checksum */
+ olinfo_status |= IGB_SET_FLAG(tx_flags,
+ IGB_TX_FLAGS_CSUM,
+ (E1000_TXD_POPTS_TXSM << 8));
+
+ /* insert IPv4 checksum */
+ olinfo_status |= IGB_SET_FLAG(tx_flags,
+ IGB_TX_FLAGS_IPV4,
+ (E1000_TXD_POPTS_IXSM << 8));
+
+ tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+}
+
+static void igb_tx_map(struct igb_ring *tx_ring,
+ struct igb_tx_buffer *first,
+ const u8 hdr_len)
+{
+ struct sk_buff *skb = first->skb;
+ struct igb_tx_buffer *tx_buffer;
+ union e1000_adv_tx_desc *tx_desc;
+ struct skb_frag_struct *frag;
+ dma_addr_t dma;
+ unsigned int data_len, size;
+ u32 tx_flags = first->tx_flags;
+ u32 cmd_type = igb_tx_cmd_type(skb, tx_flags);
+ u16 i = tx_ring->next_to_use;
+
+ tx_desc = IGB_TX_DESC(tx_ring, i);
+
+ igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
+
+ size = skb_headlen(skb);
+ data_len = skb->data_len;
+
+ dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+ tx_buffer = first;
+
+ for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+ if (dma_mapping_error(tx_ring->dev, dma))
+ goto dma_error;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buffer, len, size);
+ dma_unmap_addr_set(tx_buffer, dma, dma);
+
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+ while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
+ tx_desc->read.cmd_type_len =
+ cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD);
+
+ i++;
+ tx_desc++;
+ if (i == tx_ring->count) {
+ tx_desc = IGB_TX_DESC(tx_ring, 0);
+ i = 0;
+ }
+ tx_desc->read.olinfo_status = 0;
+
+ dma += IGB_MAX_DATA_PER_TXD;
+ size -= IGB_MAX_DATA_PER_TXD;
+
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+ }
+
+ if (likely(!data_len))
+ break;
+
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
+
+ i++;
+ tx_desc++;
+ if (i == tx_ring->count) {
+ tx_desc = IGB_TX_DESC(tx_ring, 0);
+ i = 0;
+ }
+ tx_desc->read.olinfo_status = 0;
+
+ size = skb_frag_size(frag);
+ data_len -= size;
+
+ dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
+ size, DMA_TO_DEVICE);
+
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ }
+
+ /* write last descriptor with RS and EOP bits */
+ cmd_type |= size | IGB_TXD_DCMD;
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+ netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+ /* set the timestamp */
+ first->time_stamp = jiffies;
+
+ /*
+ * Force memory writes to complete before letting h/w know there
+ * are new descriptors to fetch. (Only applicable for weak-ordered
+ * memory model archs, such as IA-64).
+ *
+ * We also need this memory barrier to make certain all of the
+ * status bits have been updated before next_to_watch is written.
+ */
+ wmb();
+
+ /* set next_to_watch value indicating a packet is present */
+ first->next_to_watch = tx_desc;
+
+ i++;
+ if (i == tx_ring->count)
+ i = 0;
+
+ tx_ring->next_to_use = i;
+
+ writel(i, tx_ring->tail);
+
+ /* we need this if more than one processor can write to our tail
+ * at a time, it syncronizes IO on IA64/Altix systems */
+ mmiowb();
+
+ return;
+
+dma_error:
+ dev_err(tx_ring->dev, "TX DMA map failed\n");
+
+ /* clear dma mappings for failed tx_buffer_info map */
+ for (;;) {
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
+ if (tx_buffer == first)
+ break;
+ if (i == 0)
+ i = tx_ring->count;
+ i--;
+ }
+
+ tx_ring->next_to_use = i;
+}
+
+static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
+{
+ struct net_device *netdev = netdev_ring(tx_ring);
+
+ if (netif_is_multiqueue(netdev))
+ netif_stop_subqueue(netdev, ring_queue_index(tx_ring));
+ else
+ netif_stop_queue(netdev);
+
+ /* Herbert's original patch had:
+ * smp_mb__after_netif_stop_queue();
+ * but since that doesn't exist yet, just open code it. */
+ smp_mb();
+
+ /* We need to check again in a case another CPU has just
+ * made room available. */
+ if (igb_desc_unused(tx_ring) < size)
+ return -EBUSY;
+
+ /* A reprieve! */
+ if (netif_is_multiqueue(netdev))
+ netif_wake_subqueue(netdev, ring_queue_index(tx_ring));
+ else
+ netif_wake_queue(netdev);
+
+ tx_ring->tx_stats.restart_queue++;
+
+ return 0;
+}
+
+static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
+{
+ if (igb_desc_unused(tx_ring) >= size)
+ return 0;
+ return __igb_maybe_stop_tx(tx_ring, size);
+}
+
+netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
+ struct igb_ring *tx_ring)
+{
+ struct igb_tx_buffer *first;
+ int tso;
+ u32 tx_flags = 0;
+#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD
+ unsigned short f;
+#endif
+ u16 count = TXD_USE_COUNT(skb_headlen(skb));
+ __be16 protocol = vlan_get_protocol(skb);
+ u8 hdr_len = 0;
+
+ /*
+ * need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD,
+ * + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD,
+ * + 2 desc gap to keep tail from touching head,
+ * + 1 desc for context descriptor,
+ * otherwise try next time
+ */
+#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD
+ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+ count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+#else
+ count += skb_shinfo(skb)->nr_frags;
+#endif
+ if (igb_maybe_stop_tx(tx_ring, count + 3)) {
+ /* this is a hard error */
+ return NETDEV_TX_BUSY;
+ }
+
+ /* record the location of the first descriptor for this packet */
+ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+ first->skb = skb;
+ first->bytecount = skb->len;
+ first->gso_segs = 1;
+
+ skb_tx_timestamp(skb);
+
+#ifdef HAVE_PTP_1588_CLOCK
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+ struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
+ if (!adapter->ptp_tx_skb) {
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ tx_flags |= IGB_TX_FLAGS_TSTAMP;
+
+ adapter->ptp_tx_skb = skb_get(skb);
+ adapter->ptp_tx_start = jiffies;
+ if (adapter->hw.mac.type == e1000_82576)
+ schedule_work(&adapter->ptp_tx_work);
+ }
+ }
+#endif /* HAVE_PTP_1588_CLOCK */
+
+ if (vlan_tx_tag_present(skb)) {
+ tx_flags |= IGB_TX_FLAGS_VLAN;
+ tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
+ }
+
+ /* record initial flags and protocol */
+ first->tx_flags = tx_flags;
+ first->protocol = protocol;
+
+ tso = igb_tso(tx_ring, first, &hdr_len);
+ if (tso < 0)
+ goto out_drop;
+ else if (!tso)
+ igb_tx_csum(tx_ring, first);
+
+ igb_tx_map(tx_ring, first, hdr_len);
+
+#ifndef HAVE_TRANS_START_IN_QUEUE
+ netdev_ring(tx_ring)->trans_start = jiffies;
+
+#endif
+ /* Make sure there is space in the ring for the next send. */
+ igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+ return NETDEV_TX_OK;
+
+out_drop:
+ igb_unmap_and_free_tx_resource(tx_ring, first);
+
+ return NETDEV_TX_OK;
+}
+
+#ifdef HAVE_TX_MQ
+static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
+ struct sk_buff *skb)
+{
+ unsigned int r_idx = skb->queue_mapping;
+
+ if (r_idx >= adapter->num_tx_queues)
+ r_idx = r_idx % adapter->num_tx_queues;
+
+ return adapter->tx_ring[r_idx];
+}
+#else
+#define igb_tx_queue_mapping(_adapter, _skb) (_adapter)->tx_ring[0]
+#endif
+
+static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ if (test_bit(__IGB_DOWN, &adapter->state)) {
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ if (skb->len <= 0) {
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ /*
+ * The minimum packet size with TCTL.PSP set is 17 so pad the skb
+ * in order to meet this minimum size requirement.
+ */
+ if (skb->len < 17) {
+ if (skb_padto(skb, 17))
+ return NETDEV_TX_OK;
+ skb->len = 17;
+ }
+
+ return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
+}
+
+/**
+ * igb_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ **/
+static void igb_tx_timeout(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+
+ /* Do the reset outside of interrupt context */
+ adapter->tx_timeout_count++;
+
+ if (hw->mac.type >= e1000_82580)
+ hw->dev_spec._82575.global_device_reset = true;
+
+ schedule_work(&adapter->reset_task);
+ E1000_WRITE_REG(hw, E1000_EICS,
+ (adapter->eims_enable_mask & ~adapter->eims_other));
+}
+
+static void igb_reset_task(struct work_struct *work)
+{
+ struct igb_adapter *adapter;
+ adapter = container_of(work, struct igb_adapter, reset_task);
+
+ igb_reinit_locked(adapter);
+}
+
+/**
+ * igb_get_stats - Get System Network Statistics
+ * @netdev: network interface device structure
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are updated here and also from the timer callback.
+ **/
+static struct net_device_stats *igb_get_stats(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ if (!test_bit(__IGB_RESETTING, &adapter->state))
+ igb_update_stats(adapter);
+
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+ /* only return the current stats */
+ return &netdev->stats;
+#else
+ /* only return the current stats */
+ return &adapter->net_stats;
+#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
+}
+
+/**
+ * igb_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int igb_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ struct pci_dev *pdev = adapter->pdev;
+ int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+
+ if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
+ dev_err(pci_dev_to_dev(pdev), "Invalid MTU setting\n");
+ return -EINVAL;
+ }
+
+#define MAX_STD_JUMBO_FRAME_SIZE 9238
+ if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
+ dev_err(pci_dev_to_dev(pdev), "MTU > 9216 not supported.\n");
+ return -EINVAL;
+ }
+
+ /* adjust max frame to be at least the size of a standard frame */
+ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+ max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
+
+ while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+ usleep_range(1000, 2000);
+
+ /* igb_down has a dependency on max_frame_size */
+ adapter->max_frame_size = max_frame;
+
+ if (netif_running(netdev))
+ igb_down(adapter);
+
+ dev_info(pci_dev_to_dev(pdev), "changing MTU from %d to %d\n",
+ netdev->mtu, new_mtu);
+ netdev->mtu = new_mtu;
+ hw->dev_spec._82575.mtu = new_mtu;
+
+ if (netif_running(netdev))
+ igb_up(adapter);
+ else
+ igb_reset(adapter);
+
+ clear_bit(__IGB_RESETTING, &adapter->state);
+
+ return 0;
+}
+
+/**
+ * igb_update_stats - Update the board statistics counters
+ * @adapter: board private structure
+ **/
+
+void igb_update_stats(struct igb_adapter *adapter)
+{
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+ struct net_device_stats *net_stats = &adapter->netdev->stats;
+#else
+ struct net_device_stats *net_stats = &adapter->net_stats;
+#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
+ struct e1000_hw *hw = &adapter->hw;
+#ifdef HAVE_PCI_ERS
+ struct pci_dev *pdev = adapter->pdev;
+#endif
+ u32 reg, mpc;
+ u16 phy_tmp;
+ int i;
+ u64 bytes, packets;
+#ifndef IGB_NO_LRO
+ u32 flushed = 0, coal = 0;
+ struct igb_q_vector *q_vector;
+#endif
+
+#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
+
+ /*
+ * Prevent stats update while adapter is being reset, or if the pci
+ * connection is down.
+ */
+ if (adapter->link_speed == 0)
+ return;
+#ifdef HAVE_PCI_ERS
+ if (pci_channel_offline(pdev))
+ return;
+
+#endif
+#ifndef IGB_NO_LRO
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ q_vector = adapter->q_vector[i];
+ if (!q_vector)
+ continue;
+ flushed += q_vector->lrolist.stats.flushed;
+ coal += q_vector->lrolist.stats.coal;
+ }
+ adapter->lro_stats.flushed = flushed;
+ adapter->lro_stats.coal = coal;
+
+#endif
+ bytes = 0;
+ packets = 0;
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ u32 rqdpc_tmp = E1000_READ_REG(hw, E1000_RQDPC(i)) & 0x0FFF;
+ struct igb_ring *ring = adapter->rx_ring[i];
+ ring->rx_stats.drops += rqdpc_tmp;
+ net_stats->rx_fifo_errors += rqdpc_tmp;
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ if (!ring->vmdq_netdev) {
+ bytes += ring->rx_stats.bytes;
+ packets += ring->rx_stats.packets;
+ }
+#else
+ bytes += ring->rx_stats.bytes;
+ packets += ring->rx_stats.packets;
+#endif
+ }
+
+ net_stats->rx_bytes = bytes;
+ net_stats->rx_packets = packets;
+
+ bytes = 0;
+ packets = 0;
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igb_ring *ring = adapter->tx_ring[i];
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ if (!ring->vmdq_netdev) {
+ bytes += ring->tx_stats.bytes;
+ packets += ring->tx_stats.packets;
+ }
+#else
+ bytes += ring->tx_stats.bytes;
+ packets += ring->tx_stats.packets;
+#endif
+ }
+ net_stats->tx_bytes = bytes;
+ net_stats->tx_packets = packets;
+
+ /* read stats registers */
+ adapter->stats.crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
+ adapter->stats.gprc += E1000_READ_REG(hw, E1000_GPRC);
+ adapter->stats.gorc += E1000_READ_REG(hw, E1000_GORCL);
+ E1000_READ_REG(hw, E1000_GORCH); /* clear GORCL */
+ adapter->stats.bprc += E1000_READ_REG(hw, E1000_BPRC);
+ adapter->stats.mprc += E1000_READ_REG(hw, E1000_MPRC);
+ adapter->stats.roc += E1000_READ_REG(hw, E1000_ROC);
+
+ adapter->stats.prc64 += E1000_READ_REG(hw, E1000_PRC64);
+ adapter->stats.prc127 += E1000_READ_REG(hw, E1000_PRC127);
+ adapter->stats.prc255 += E1000_READ_REG(hw, E1000_PRC255);
+ adapter->stats.prc511 += E1000_READ_REG(hw, E1000_PRC511);
+ adapter->stats.prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
+ adapter->stats.prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
+ adapter->stats.symerrs += E1000_READ_REG(hw, E1000_SYMERRS);
+ adapter->stats.sec += E1000_READ_REG(hw, E1000_SEC);
+
+ mpc = E1000_READ_REG(hw, E1000_MPC);
+ adapter->stats.mpc += mpc;
+ net_stats->rx_fifo_errors += mpc;
+ adapter->stats.scc += E1000_READ_REG(hw, E1000_SCC);
+ adapter->stats.ecol += E1000_READ_REG(hw, E1000_ECOL);
+ adapter->stats.mcc += E1000_READ_REG(hw, E1000_MCC);
+ adapter->stats.latecol += E1000_READ_REG(hw, E1000_LATECOL);
+ adapter->stats.dc += E1000_READ_REG(hw, E1000_DC);
+ adapter->stats.rlec += E1000_READ_REG(hw, E1000_RLEC);
+ adapter->stats.xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
+ adapter->stats.xontxc += E1000_READ_REG(hw, E1000_XONTXC);
+ adapter->stats.xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC);
+ adapter->stats.xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
+ adapter->stats.fcruc += E1000_READ_REG(hw, E1000_FCRUC);
+ adapter->stats.gptc += E1000_READ_REG(hw, E1000_GPTC);
+ adapter->stats.gotc += E1000_READ_REG(hw, E1000_GOTCL);
+ E1000_READ_REG(hw, E1000_GOTCH); /* clear GOTCL */
+ adapter->stats.rnbc += E1000_READ_REG(hw, E1000_RNBC);
+ adapter->stats.ruc += E1000_READ_REG(hw, E1000_RUC);
+ adapter->stats.rfc += E1000_READ_REG(hw, E1000_RFC);
+ adapter->stats.rjc += E1000_READ_REG(hw, E1000_RJC);
+ adapter->stats.tor += E1000_READ_REG(hw, E1000_TORH);
+ adapter->stats.tot += E1000_READ_REG(hw, E1000_TOTH);
+ adapter->stats.tpr += E1000_READ_REG(hw, E1000_TPR);
+
+ adapter->stats.ptc64 += E1000_READ_REG(hw, E1000_PTC64);
+ adapter->stats.ptc127 += E1000_READ_REG(hw, E1000_PTC127);
+ adapter->stats.ptc255 += E1000_READ_REG(hw, E1000_PTC255);
+ adapter->stats.ptc511 += E1000_READ_REG(hw, E1000_PTC511);
+ adapter->stats.ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
+ adapter->stats.ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
+
+ adapter->stats.mptc += E1000_READ_REG(hw, E1000_MPTC);
+ adapter->stats.bptc += E1000_READ_REG(hw, E1000_BPTC);
+
+ adapter->stats.tpt += E1000_READ_REG(hw, E1000_TPT);
+ adapter->stats.colc += E1000_READ_REG(hw, E1000_COLC);
+
+ adapter->stats.algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
+ /* read internal phy sepecific stats */
+ reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
+ adapter->stats.rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
+
+ /* this stat has invalid values on i210/i211 */
+ if ((hw->mac.type != e1000_i210) &&
+ (hw->mac.type != e1000_i211))
+ adapter->stats.tncrs += E1000_READ_REG(hw, E1000_TNCRS);
+ }
+ adapter->stats.tsctc += E1000_READ_REG(hw, E1000_TSCTC);
+ adapter->stats.tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
+
+ adapter->stats.iac += E1000_READ_REG(hw, E1000_IAC);
+ adapter->stats.icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
+ adapter->stats.icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
+ adapter->stats.icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
+ adapter->stats.ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
+ adapter->stats.ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
+ adapter->stats.ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
+ adapter->stats.ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
+ adapter->stats.icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
+
+ /* Fill out the OS statistics structure */
+ net_stats->multicast = adapter->stats.mprc;
+ net_stats->collisions = adapter->stats.colc;
+
+ /* Rx Errors */
+
+ /* RLEC on some newer hardware can be incorrect so build
+ * our own version based on RUC and ROC */
+ net_stats->rx_errors = adapter->stats.rxerrc +
+ adapter->stats.crcerrs + adapter->stats.algnerrc +
+ adapter->stats.ruc + adapter->stats.roc +
+ adapter->stats.cexterr;
+ net_stats->rx_length_errors = adapter->stats.ruc +
+ adapter->stats.roc;
+ net_stats->rx_crc_errors = adapter->stats.crcerrs;
+ net_stats->rx_frame_errors = adapter->stats.algnerrc;
+ net_stats->rx_missed_errors = adapter->stats.mpc;
+
+ /* Tx Errors */
+ net_stats->tx_errors = adapter->stats.ecol +
+ adapter->stats.latecol;
+ net_stats->tx_aborted_errors = adapter->stats.ecol;
+ net_stats->tx_window_errors = adapter->stats.latecol;
+ net_stats->tx_carrier_errors = adapter->stats.tncrs;
+
+ /* Tx Dropped needs to be maintained elsewhere */
+
+ /* Phy Stats */
+ if (hw->phy.media_type == e1000_media_type_copper) {
+ if ((adapter->link_speed == SPEED_1000) &&
+ (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
+ phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
+ adapter->phy_stats.idle_errors += phy_tmp;
+ }
+ }
+
+ /* Management Stats */
+ adapter->stats.mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
+ adapter->stats.mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
+ if (hw->mac.type > e1000_82580) {
+ adapter->stats.o2bgptc += E1000_READ_REG(hw, E1000_O2BGPTC);
+ adapter->stats.o2bspc += E1000_READ_REG(hw, E1000_O2BSPC);
+ adapter->stats.b2ospc += E1000_READ_REG(hw, E1000_B2OSPC);
+ adapter->stats.b2ogprc += E1000_READ_REG(hw, E1000_B2OGPRC);
+ }
+}
+
+static irqreturn_t igb_msix_other(int irq, void *data)
+{
+ struct igb_adapter *adapter = data;
+ struct e1000_hw *hw = &adapter->hw;
+ u32 icr = E1000_READ_REG(hw, E1000_ICR);
+ /* reading ICR causes bit 31 of EICR to be cleared */
+
+ if (icr & E1000_ICR_DRSTA)
+ schedule_work(&adapter->reset_task);
+
+ if (icr & E1000_ICR_DOUTSYNC) {
+ /* HW is reporting DMA is out of sync */
+ adapter->stats.doosync++;
+ /* The DMA Out of Sync is also indication of a spoof event
+ * in IOV mode. Check the Wrong VM Behavior register to
+ * see if it is really a spoof event. */
+ igb_check_wvbr(adapter);
+ }
+
+ /* Check for a mailbox event */
+ if (icr & E1000_ICR_VMMB)
+ igb_msg_task(adapter);
+
+ if (icr & E1000_ICR_LSC) {
+ hw->mac.get_link_status = 1;
+ /* guard against interrupt when we're going down */
+ if (!test_bit(__IGB_DOWN, &adapter->state))
+ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
+
+#ifdef HAVE_PTP_1588_CLOCK
+ if (icr & E1000_ICR_TS) {
+ u32 tsicr = E1000_READ_REG(hw, E1000_TSICR);
+
+ if (tsicr & E1000_TSICR_TXTS) {
+ /* acknowledge the interrupt */
+ E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS);
+ /* retrieve hardware timestamp */
+ schedule_work(&adapter->ptp_tx_work);
+ }
+ }
+#endif /* HAVE_PTP_1588_CLOCK */
+
+ /* Check for MDD event */
+ if (icr & E1000_ICR_MDDET)
+ igb_process_mdd_event(adapter);
+
+ E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_other);
+
+ return IRQ_HANDLED;
+}
+
+static void igb_write_itr(struct igb_q_vector *q_vector)
+{
+ struct igb_adapter *adapter = q_vector->adapter;
+ u32 itr_val = q_vector->itr_val & 0x7FFC;
+
+ if (!q_vector->set_itr)
+ return;
+
+ if (!itr_val)
+ itr_val = 0x4;
+
+ if (adapter->hw.mac.type == e1000_82575)
+ itr_val |= itr_val << 16;
+ else
+ itr_val |= E1000_EITR_CNT_IGNR;
+
+ writel(itr_val, q_vector->itr_register);
+ q_vector->set_itr = 0;
+}
+
+static irqreturn_t igb_msix_ring(int irq, void *data)
+{
+ struct igb_q_vector *q_vector = data;
+
+ /* Write the ITR value calculated from the previous interrupt. */
+ igb_write_itr(q_vector);
+
+ napi_schedule(&q_vector->napi);
+
+ return IRQ_HANDLED;
+}
+
+#ifdef IGB_DCA
+static void igb_update_tx_dca(struct igb_adapter *adapter,
+ struct igb_ring *tx_ring,
+ int cpu)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 txctrl = dca3_get_tag(tx_ring->dev, cpu);
+
+ if (hw->mac.type != e1000_82575)
+ txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT_82576;
+
+ /*
+ * We can enable relaxed ordering for reads, but not writes when
+ * DCA is enabled. This is due to a known issue in some chipsets
+ * which will cause the DCA tag to be cleared.
+ */
+ txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN |
+ E1000_DCA_TXCTRL_DATA_RRO_EN |
+ E1000_DCA_TXCTRL_DESC_DCA_EN;
+
+ E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl);
+}
+
+static void igb_update_rx_dca(struct igb_adapter *adapter,
+ struct igb_ring *rx_ring,
+ int cpu)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu);
+
+ if (hw->mac.type != e1000_82575)
+ rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT_82576;
+
+ /*
+ * We can enable relaxed ordering for reads, but not writes when
+ * DCA is enabled. This is due to a known issue in some chipsets
+ * which will cause the DCA tag to be cleared.
+ */
+ rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN |
+ E1000_DCA_RXCTRL_DESC_DCA_EN;
+
+ E1000_WRITE_REG(hw, E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl);
+}
+
+static void igb_update_dca(struct igb_q_vector *q_vector)
+{
+ struct igb_adapter *adapter = q_vector->adapter;
+ int cpu = get_cpu();
+
+ if (q_vector->cpu == cpu)
+ goto out_no_update;
+
+ if (q_vector->tx.ring)
+ igb_update_tx_dca(adapter, q_vector->tx.ring, cpu);
+
+ if (q_vector->rx.ring)
+ igb_update_rx_dca(adapter, q_vector->rx.ring, cpu);
+
+ q_vector->cpu = cpu;
+out_no_update:
+ put_cpu();
+}
+
+static void igb_setup_dca(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int i;
+
+ if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
+ return;
+
+ /* Always use CB2 mode, difference is masked in the CB driver. */
+ E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
+
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ adapter->q_vector[i]->cpu = -1;
+ igb_update_dca(adapter->q_vector[i]);
+ }
+}
+
+static int __igb_notify_dca(struct device *dev, void *data)
+{
+ struct net_device *netdev = dev_get_drvdata(dev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct pci_dev *pdev = adapter->pdev;
+ struct e1000_hw *hw = &adapter->hw;
+ unsigned long event = *(unsigned long *)data;
+
+ switch (event) {
+ case DCA_PROVIDER_ADD:
+ /* if already enabled, don't do it again */
+ if (adapter->flags & IGB_FLAG_DCA_ENABLED)
+ break;
+ if (dca_add_requester(dev) == E1000_SUCCESS) {
+ adapter->flags |= IGB_FLAG_DCA_ENABLED;
+ dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
+ igb_setup_dca(adapter);
+ break;
+ }
+ /* Fall Through since DCA is disabled. */
+ case DCA_PROVIDER_REMOVE:
+ if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
+ /* without this a class_device is left
+ * hanging around in the sysfs model */
+ dca_remove_requester(dev);
+ dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
+ adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
+ E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
+ }
+ break;
+ }
+
+ return E1000_SUCCESS;
+}
+
+static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
+ void *p)
+{
+ int ret_val;
+
+ ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
+ __igb_notify_dca);
+
+ return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
+}
+#endif /* IGB_DCA */
+
+static int igb_vf_configure(struct igb_adapter *adapter, int vf)
+{
+ unsigned char mac_addr[ETH_ALEN];
+
+ random_ether_addr(mac_addr);
+ igb_set_vf_mac(adapter, vf, mac_addr);
+
+#ifdef IFLA_VF_MAX
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+ /* By default spoof check is enabled for all VFs */
+ adapter->vf_data[vf].spoofchk_enabled = true;
+#endif
+#endif
+
+ return true;
+}
+
+static void igb_ping_all_vfs(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 ping;
+ int i;
+
+ for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
+ ping = E1000_PF_CONTROL_MSG;
+ if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
+ ping |= E1000_VT_MSGTYPE_CTS;
+ e1000_write_mbx(hw, &ping, 1, i);
+ }
+}
+
+/**
+ * igb_mta_set_ - Set multicast filter table address
+ * @adapter: pointer to the adapter structure
+ * @hash_value: determines the MTA register and bit to set
+ *
+ * The multicast table address is a register array of 32-bit registers.
+ * The hash_value is used to determine what register the bit is in, the
+ * current value is read, the new bit is OR'd in and the new value is
+ * written back into the register.
+ **/
+void igb_mta_set(struct igb_adapter *adapter, u32 hash_value)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 hash_bit, hash_reg, mta;
+
+ /*
+ * The MTA is a register array of 32-bit registers. It is
+ * treated like an array of (32*mta_reg_count) bits. We want to
+ * set bit BitArray[hash_value]. So we figure out what register
+ * the bit is in, read it, OR in the new bit, then write
+ * back the new value. The (hw->mac.mta_reg_count - 1) serves as a
+ * mask to bits 31:5 of the hash value which gives us the
+ * register we're modifying. The hash bit within that register
+ * is determined by the lower 5 bits of the hash value.
+ */
+ hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+ hash_bit = hash_value & 0x1F;
+
+ mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg);
+
+ mta |= (1 << hash_bit);
+
+ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta);
+ E1000_WRITE_FLUSH(hw);
+}
+
+static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+
+ struct e1000_hw *hw = &adapter->hw;
+ u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(vf));
+ struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+
+ vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
+ IGB_VF_FLAG_MULTI_PROMISC);
+ vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
+
+#ifdef IGB_ENABLE_VF_PROMISC
+ if (*msgbuf & E1000_VF_SET_PROMISC_UNICAST) {
+ vmolr |= E1000_VMOLR_ROPE;
+ vf_data->flags |= IGB_VF_FLAG_UNI_PROMISC;
+ *msgbuf &= ~E1000_VF_SET_PROMISC_UNICAST;
+ }
+#endif
+ if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
+ vmolr |= E1000_VMOLR_MPME;
+ vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
+ *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
+ } else {
+ /*
+ * if we have hashes and we are clearing a multicast promisc
+ * flag we need to write the hashes to the MTA as this step
+ * was previously skipped
+ */
+ if (vf_data->num_vf_mc_hashes > 30) {
+ vmolr |= E1000_VMOLR_MPME;
+ } else if (vf_data->num_vf_mc_hashes) {
+ int j;
+ vmolr |= E1000_VMOLR_ROMPE;
+ for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
+ igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
+ }
+ }
+
+ E1000_WRITE_REG(hw, E1000_VMOLR(vf), vmolr);
+
+ /* there are flags left unprocessed, likely not supported */
+ if (*msgbuf & E1000_VT_MSGINFO_MASK)
+ return -EINVAL;
+
+ return 0;
+
+}
+
+static int igb_set_vf_multicasts(struct igb_adapter *adapter,
+ u32 *msgbuf, u32 vf)
+{
+ int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
+ u16 *hash_list = (u16 *)&msgbuf[1];
+ struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+ int i;
+
+ /* salt away the number of multicast addresses assigned
+ * to this VF for later use to restore when the PF multi cast
+ * list changes
+ */
+ vf_data->num_vf_mc_hashes = n;
+
+ /* only up to 30 hash values supported */
+ if (n > 30)
+ n = 30;
+
+ /* store the hashes for later use */
+ for (i = 0; i < n; i++)
+ vf_data->vf_mc_hashes[i] = hash_list[i];
+
+ /* Flush and reset the mta with the new values */
+ igb_set_rx_mode(adapter->netdev);
+
+ return 0;
+}
+
+static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct vf_data_storage *vf_data;
+ int i, j;
+
+ for (i = 0; i < adapter->vfs_allocated_count; i++) {
+ u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
+ vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
+
+ vf_data = &adapter->vf_data[i];
+
+ if ((vf_data->num_vf_mc_hashes > 30) ||
+ (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
+ vmolr |= E1000_VMOLR_MPME;
+ } else if (vf_data->num_vf_mc_hashes) {
+ vmolr |= E1000_VMOLR_ROMPE;
+ for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
+ igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
+ }
+ E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
+ }
+}
+
+static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 pool_mask, reg, vid;
+ u16 vlan_default;
+ int i;
+
+ pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
+
+ /* Find the vlan filter for this id */
+ for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
+ reg = E1000_READ_REG(hw, E1000_VLVF(i));
+
+ /* remove the vf from the pool */
+ reg &= ~pool_mask;
+
+ /* if pool is empty then remove entry from vfta */
+ if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
+ (reg & E1000_VLVF_VLANID_ENABLE)) {
+ reg = 0;
+ vid = reg & E1000_VLVF_VLANID_MASK;
+ igb_vfta_set(adapter, vid, FALSE);
+ }
+
+ E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
+ }
+
+ adapter->vf_data[vf].vlans_enabled = 0;
+
+ vlan_default = adapter->vf_data[vf].default_vf_vlan_id;
+ if (vlan_default)
+ igb_vlvf_set(adapter, vlan_default, true, vf);
+}
+
+s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 reg, i;
+
+ /* The vlvf table only exists on 82576 hardware and newer */
+ if (hw->mac.type < e1000_82576)
+ return -1;
+
+ /* we only need to do this if VMDq is enabled */
+ if (!adapter->vmdq_pools)
+ return -1;
+
+ /* Find the vlan filter for this id */
+ for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
+ reg = E1000_READ_REG(hw, E1000_VLVF(i));
+ if ((reg & E1000_VLVF_VLANID_ENABLE) &&
+ vid == (reg & E1000_VLVF_VLANID_MASK))
+ break;
+ }
+
+ if (add) {
+ if (i == E1000_VLVF_ARRAY_SIZE) {
+ /* Did not find a matching VLAN ID entry that was
+ * enabled. Search for a free filter entry, i.e.
+ * one without the enable bit set
+ */
+ for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
+ reg = E1000_READ_REG(hw, E1000_VLVF(i));
+ if (!(reg & E1000_VLVF_VLANID_ENABLE))
+ break;
+ }
+ }
+ if (i < E1000_VLVF_ARRAY_SIZE) {
+ /* Found an enabled/available entry */
+ reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
+
+ /* if !enabled we need to set this up in vfta */
+ if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
+ /* add VID to filter table */
+ igb_vfta_set(adapter, vid, TRUE);
+ reg |= E1000_VLVF_VLANID_ENABLE;
+ }
+ reg &= ~E1000_VLVF_VLANID_MASK;
+ reg |= vid;
+ E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
+
+ /* do not modify RLPML for PF devices */
+ if (vf >= adapter->vfs_allocated_count)
+ return E1000_SUCCESS;
+
+ if (!adapter->vf_data[vf].vlans_enabled) {
+ u32 size;
+ reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
+ size = reg & E1000_VMOLR_RLPML_MASK;
+ size += 4;
+ reg &= ~E1000_VMOLR_RLPML_MASK;
+ reg |= size;
+ E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
+ }
+
+ adapter->vf_data[vf].vlans_enabled++;
+ }
+ } else {
+ if (i < E1000_VLVF_ARRAY_SIZE) {
+ /* remove vf from the pool */
+ reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
+ /* if pool is empty then remove entry from vfta */
+ if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
+ reg = 0;
+ igb_vfta_set(adapter, vid, FALSE);
+ }
+ E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
+
+ /* do not modify RLPML for PF devices */
+ if (vf >= adapter->vfs_allocated_count)
+ return E1000_SUCCESS;
+
+ adapter->vf_data[vf].vlans_enabled--;
+ if (!adapter->vf_data[vf].vlans_enabled) {
+ u32 size;
+ reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
+ size = reg & E1000_VMOLR_RLPML_MASK;
+ size -= 4;
+ reg &= ~E1000_VMOLR_RLPML_MASK;
+ reg |= size;
+ E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
+ }
+ }
+ }
+ return E1000_SUCCESS;
+}
+
+#ifdef IFLA_VF_MAX
+static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ if (vid)
+ E1000_WRITE_REG(hw, E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
+ else
+ E1000_WRITE_REG(hw, E1000_VMVIR(vf), 0);
+}
+
+static int igb_ndo_set_vf_vlan(struct net_device *netdev,
+ int vf, u16 vlan, u8 qos)
+{
+ int err = 0;
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ /* VLAN IDs accepted range 0-4094 */
+ if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7))
+ return -EINVAL;
+ if (vlan || qos) {
+ err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
+ if (err)
+ goto out;
+ igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
+ igb_set_vmolr(adapter, vf, !vlan);
+ adapter->vf_data[vf].pf_vlan = vlan;
+ adapter->vf_data[vf].pf_qos = qos;
+ igb_set_vf_vlan_strip(adapter, vf, true);
+ dev_info(&adapter->pdev->dev,
+ "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
+ if (test_bit(__IGB_DOWN, &adapter->state)) {
+ dev_warn(&adapter->pdev->dev,
+ "The VF VLAN has been set,"
+ " but the PF device is not up.\n");
+ dev_warn(&adapter->pdev->dev,
+ "Bring the PF device up before"
+ " attempting to use the VF device.\n");
+ }
+ } else {
+ if (adapter->vf_data[vf].pf_vlan)
+ dev_info(&adapter->pdev->dev,
+ "Clearing VLAN on VF %d\n", vf);
+ igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
+ false, vf);
+ igb_set_vmvir(adapter, vlan, vf);
+ igb_set_vmolr(adapter, vf, true);
+ igb_set_vf_vlan_strip(adapter, vf, false);
+ adapter->vf_data[vf].pf_vlan = 0;
+ adapter->vf_data[vf].pf_qos = 0;
+ }
+out:
+ return err;
+}
+
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
+ bool setting)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ u32 dtxswc, reg_offset;
+
+ if (!adapter->vfs_allocated_count)
+ return -EOPNOTSUPP;
+
+ if (vf >= adapter->vfs_allocated_count)
+ return -EINVAL;
+
+ reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC;
+ dtxswc = E1000_READ_REG(hw, reg_offset);
+ if (setting)
+ dtxswc |= ((1 << vf) |
+ (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)));
+ else
+ dtxswc &= ~((1 << vf) |
+ (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)));
+ E1000_WRITE_REG(hw, reg_offset, dtxswc);
+
+ adapter->vf_data[vf].spoofchk_enabled = setting;
+ return E1000_SUCCESS;
+}
+#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */
+#endif /* IFLA_VF_MAX */
+
+static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int i;
+ u32 reg;
+
+ /* Find the vlan filter for this id */
+ for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
+ reg = E1000_READ_REG(hw, E1000_VLVF(i));
+ if ((reg & E1000_VLVF_VLANID_ENABLE) &&
+ vid == (reg & E1000_VLVF_VLANID_MASK))
+ break;
+ }
+
+ if (i >= E1000_VLVF_ARRAY_SIZE)
+ i = -1;
+
+ return i;
+}
+
+static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
+ int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
+ int err = 0;
+
+ if (vid)
+ igb_set_vf_vlan_strip(adapter, vf, true);
+ else
+ igb_set_vf_vlan_strip(adapter, vf, false);
+
+ /* If in promiscuous mode we need to make sure the PF also has
+ * the VLAN filter set.
+ */
+ if (add && (adapter->netdev->flags & IFF_PROMISC))
+ err = igb_vlvf_set(adapter, vid, add,
+ adapter->vfs_allocated_count);
+ if (err)
+ goto out;
+
+ err = igb_vlvf_set(adapter, vid, add, vf);
+
+ if (err)
+ goto out;
+
+ /* Go through all the checks to see if the VLAN filter should
+ * be wiped completely.
+ */
+ if (!add && (adapter->netdev->flags & IFF_PROMISC)) {
+ u32 vlvf, bits;
+
+ int regndx = igb_find_vlvf_entry(adapter, vid);
+ if (regndx < 0)
+ goto out;
+ /* See if any other pools are set for this VLAN filter
+ * entry other than the PF.
+ */
+ vlvf = bits = E1000_READ_REG(hw, E1000_VLVF(regndx));
+ bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT +
+ adapter->vfs_allocated_count);
+ /* If the filter was removed then ensure PF pool bit
+ * is cleared if the PF only added itself to the pool
+ * because the PF is in promiscuous mode.
+ */
+ if ((vlvf & VLAN_VID_MASK) == vid &&
+#ifndef HAVE_VLAN_RX_REGISTER
+ !test_bit(vid, adapter->active_vlans) &&
+#endif
+ !bits)
+ igb_vlvf_set(adapter, vid, add,
+ adapter->vfs_allocated_count);
+ }
+
+out:
+ return err;
+}
+
+static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ /* clear flags except flag that the PF has set the MAC */
+ adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
+ adapter->vf_data[vf].last_nack = jiffies;
+
+ /* reset offloads to defaults */
+ igb_set_vmolr(adapter, vf, true);
+
+ /* reset vlans for device */
+ igb_clear_vf_vfta(adapter, vf);
+#ifdef IFLA_VF_MAX
+ if (adapter->vf_data[vf].pf_vlan)
+ igb_ndo_set_vf_vlan(adapter->netdev, vf,
+ adapter->vf_data[vf].pf_vlan,
+ adapter->vf_data[vf].pf_qos);
+ else
+ igb_clear_vf_vfta(adapter, vf);
+#endif
+
+ /* reset multicast table array for vf */
+ adapter->vf_data[vf].num_vf_mc_hashes = 0;
+
+ /* Flush and reset the mta with the new values */
+ igb_set_rx_mode(adapter->netdev);
+
+ /*
+ * Reset the VFs TDWBAL and TDWBAH registers which are not
+ * cleared by a VFLR
+ */
+ E1000_WRITE_REG(hw, E1000_TDWBAH(vf), 0);
+ E1000_WRITE_REG(hw, E1000_TDWBAL(vf), 0);
+ if (hw->mac.type == e1000_82576) {
+ E1000_WRITE_REG(hw, E1000_TDWBAH(IGB_MAX_VF_FUNCTIONS + vf), 0);
+ E1000_WRITE_REG(hw, E1000_TDWBAL(IGB_MAX_VF_FUNCTIONS + vf), 0);
+ }
+}
+
+static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
+{
+ unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
+
+ /* generate a new mac address as we were hotplug removed/added */
+ if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
+ random_ether_addr(vf_mac);
+
+ /* process remaining reset events */
+ igb_vf_reset(adapter, vf);
+}
+
+static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
+ u32 reg, msgbuf[3];
+ u8 *addr = (u8 *)(&msgbuf[1]);
+
+ /* process all the same items cleared in a function level reset */
+ igb_vf_reset(adapter, vf);
+
+ /* set vf mac address */
+ igb_del_mac_filter(adapter, vf_mac, vf);
+ igb_add_mac_filter(adapter, vf_mac, vf);
+
+ /* enable transmit and receive for vf */
+ reg = E1000_READ_REG(hw, E1000_VFTE);
+ E1000_WRITE_REG(hw, E1000_VFTE, reg | (1 << vf));
+ reg = E1000_READ_REG(hw, E1000_VFRE);
+ E1000_WRITE_REG(hw, E1000_VFRE, reg | (1 << vf));
+
+ adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
+
+ /* reply to reset with ack and vf mac address */
+ msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
+ memcpy(addr, vf_mac, 6);
+ e1000_write_mbx(hw, msgbuf, 3, vf);
+}
+
+static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
+{
+ /*
+ * The VF MAC Address is stored in a packed array of bytes
+ * starting at the second 32 bit word of the msg array
+ */
+ unsigned char *addr = (unsigned char *)&msg[1];
+ int err = -1;
+
+ if (is_valid_ether_addr(addr))
+ err = igb_set_vf_mac(adapter, vf, addr);
+
+ return err;
+}
+
+static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+ u32 msg = E1000_VT_MSGTYPE_NACK;
+
+ /* if device isn't clear to send it shouldn't be reading either */
+ if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
+ time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
+ e1000_write_mbx(hw, &msg, 1, vf);
+ vf_data->last_nack = jiffies;
+ }
+}
+
+static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ u32 msgbuf[E1000_VFMAILBOX_SIZE];
+ struct e1000_hw *hw = &adapter->hw;
+ struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+ s32 retval;
+
+ retval = e1000_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
+
+ if (retval) {
+ dev_err(pci_dev_to_dev(pdev), "Error receiving message from VF\n");
+ return;
+ }
+
+ /* this is a message we already processed, do nothing */
+ if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
+ return;
+
+ /*
+ * until the vf completes a reset it should not be
+ * allowed to start any configuration.
+ */
+
+ if (msgbuf[0] == E1000_VF_RESET) {
+ igb_vf_reset_msg(adapter, vf);
+ return;
+ }
+
+ if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
+ msgbuf[0] = E1000_VT_MSGTYPE_NACK;
+ if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
+ e1000_write_mbx(hw, msgbuf, 1, vf);
+ vf_data->last_nack = jiffies;
+ }
+ return;
+ }
+
+ switch ((msgbuf[0] & 0xFFFF)) {
+ case E1000_VF_SET_MAC_ADDR:
+ retval = -EINVAL;
+#ifndef IGB_DISABLE_VF_MAC_SET
+ if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
+ retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
+ else
+ DPRINTK(DRV, INFO,
+ "VF %d attempted to override administratively "
+ "set MAC address\nReload the VF driver to "
+ "resume operations\n", vf);
+#endif
+ break;
+ case E1000_VF_SET_PROMISC:
+ retval = igb_set_vf_promisc(adapter, msgbuf, vf);
+ break;
+ case E1000_VF_SET_MULTICAST:
+ retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
+ break;
+ case E1000_VF_SET_LPE:
+ retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
+ break;
+ case E1000_VF_SET_VLAN:
+ retval = -1;
+#ifdef IFLA_VF_MAX
+ if (vf_data->pf_vlan)
+ DPRINTK(DRV, INFO,
+ "VF %d attempted to override administratively "
+ "set VLAN tag\nReload the VF driver to "
+ "resume operations\n", vf);
+ else
+#endif
+ retval = igb_set_vf_vlan(adapter, msgbuf, vf);
+ break;
+ default:
+ dev_err(pci_dev_to_dev(pdev), "Unhandled Msg %08x\n", msgbuf[0]);
+ retval = -E1000_ERR_MBX;
+ break;
+ }
+
+ /* notify the VF of the results of what it sent us */
+ if (retval)
+ msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
+ else
+ msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
+
+ msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
+
+ e1000_write_mbx(hw, msgbuf, 1, vf);
+}
+
+static void igb_msg_task(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 vf;
+
+ for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
+ /* process any reset requests */
+ if (!e1000_check_for_rst(hw, vf))
+ igb_vf_reset_event(adapter, vf);
+
+ /* process any messages pending */
+ if (!e1000_check_for_msg(hw, vf))
+ igb_rcv_msg_from_vf(adapter, vf);
+
+ /* process any acks */
+ if (!e1000_check_for_ack(hw, vf))
+ igb_rcv_ack_from_vf(adapter, vf);
+ }
+}
+
+/**
+ * igb_set_uta - Set unicast filter table address
+ * @adapter: board private structure
+ *
+ * The unicast table address is a register array of 32-bit registers.
+ * The table is meant to be used in a way similar to how the MTA is used
+ * however due to certain limitations in the hardware it is necessary to
+ * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
+ * enable bit to allow vlan tag stripping when promiscuous mode is enabled
+ **/
+static void igb_set_uta(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int i;
+
+ /* The UTA table only exists on 82576 hardware and newer */
+ if (hw->mac.type < e1000_82576)
+ return;
+
+ /* we only need to do this if VMDq is enabled */
+ if (!adapter->vmdq_pools)
+ return;
+
+ for (i = 0; i < hw->mac.uta_reg_count; i++)
+ E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, ~0);
+}
+
+/**
+ * igb_intr_msi - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t igb_intr_msi(int irq, void *data)
+{
+ struct igb_adapter *adapter = data;
+ struct igb_q_vector *q_vector = adapter->q_vector[0];
+ struct e1000_hw *hw = &adapter->hw;
+ /* read ICR disables interrupts using IAM */
+ u32 icr = E1000_READ_REG(hw, E1000_ICR);
+
+ igb_write_itr(q_vector);
+
+ if (icr & E1000_ICR_DRSTA)
+ schedule_work(&adapter->reset_task);
+
+ if (icr & E1000_ICR_DOUTSYNC) {
+ /* HW is reporting DMA is out of sync */
+ adapter->stats.doosync++;
+ }
+
+ if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+ hw->mac.get_link_status = 1;
+ if (!test_bit(__IGB_DOWN, &adapter->state))
+ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
+
+#ifdef HAVE_PTP_1588_CLOCK
+ if (icr & E1000_ICR_TS) {
+ u32 tsicr = E1000_READ_REG(hw, E1000_TSICR);
+
+ if (tsicr & E1000_TSICR_TXTS) {
+ /* acknowledge the interrupt */
+ E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS);
+ /* retrieve hardware timestamp */
+ schedule_work(&adapter->ptp_tx_work);
+ }
+ }
+#endif /* HAVE_PTP_1588_CLOCK */
+
+ napi_schedule(&q_vector->napi);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * igb_intr - Legacy Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t igb_intr(int irq, void *data)
+{
+ struct igb_adapter *adapter = data;
+ struct igb_q_vector *q_vector = adapter->q_vector[0];
+ struct e1000_hw *hw = &adapter->hw;
+ /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
+ * need for the IMC write */
+ u32 icr = E1000_READ_REG(hw, E1000_ICR);
+
+ /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
+ * not set, then the adapter didn't send an interrupt */
+ if (!(icr & E1000_ICR_INT_ASSERTED))
+ return IRQ_NONE;
+
+ igb_write_itr(q_vector);
+
+ if (icr & E1000_ICR_DRSTA)
+ schedule_work(&adapter->reset_task);
+
+ if (icr & E1000_ICR_DOUTSYNC) {
+ /* HW is reporting DMA is out of sync */
+ adapter->stats.doosync++;
+ }
+
+ if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+ hw->mac.get_link_status = 1;
+ /* guard against interrupt when we're going down */
+ if (!test_bit(__IGB_DOWN, &adapter->state))
+ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
+
+#ifdef HAVE_PTP_1588_CLOCK
+ if (icr & E1000_ICR_TS) {
+ u32 tsicr = E1000_READ_REG(hw, E1000_TSICR);
+
+ if (tsicr & E1000_TSICR_TXTS) {
+ /* acknowledge the interrupt */
+ E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS);
+ /* retrieve hardware timestamp */
+ schedule_work(&adapter->ptp_tx_work);
+ }
+ }
+#endif /* HAVE_PTP_1588_CLOCK */
+
+ napi_schedule(&q_vector->napi);
+
+ return IRQ_HANDLED;
+}
+
+void igb_ring_irq_enable(struct igb_q_vector *q_vector)
+{
+ struct igb_adapter *adapter = q_vector->adapter;
+ struct e1000_hw *hw = &adapter->hw;
+
+ if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
+ (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
+ if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
+ igb_set_itr(q_vector);
+ else
+ igb_update_ring_itr(q_vector);
+ }
+
+ if (!test_bit(__IGB_DOWN, &adapter->state)) {
+ if (adapter->msix_entries)
+ E1000_WRITE_REG(hw, E1000_EIMS, q_vector->eims_value);
+ else
+ igb_irq_enable(adapter);
+ }
+}
+
+/**
+ * igb_poll - NAPI Rx polling callback
+ * @napi: napi polling structure
+ * @budget: count of how many packets we should handle
+ **/
+static int igb_poll(struct napi_struct *napi, int budget)
+{
+ struct igb_q_vector *q_vector = container_of(napi, struct igb_q_vector, napi);
+ bool clean_complete = true;
+
+#ifdef IGB_DCA
+ if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
+ igb_update_dca(q_vector);
+#endif
+ if (q_vector->tx.ring)
+ clean_complete = igb_clean_tx_irq(q_vector);
+
+ if (q_vector->rx.ring)
+ clean_complete &= igb_clean_rx_irq(q_vector, budget);
+
+#ifndef HAVE_NETDEV_NAPI_LIST
+ /* if netdev is disabled we need to stop polling */
+ if (!netif_running(q_vector->adapter->netdev))
+ clean_complete = true;
+
+#endif
+ /* If all work not completed, return budget and keep polling */
+ if (!clean_complete)
+ return budget;
+
+ /* If not enough Rx work done, exit the polling mode */
+ napi_complete(napi);
+ igb_ring_irq_enable(q_vector);
+
+ return 0;
+}
+
+/**
+ * igb_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: pointer to q_vector containing needed info
+ * returns TRUE if ring is completely cleaned
+ **/
+static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
+{
+ struct igb_adapter *adapter = q_vector->adapter;
+ struct igb_ring *tx_ring = q_vector->tx.ring;
+ struct igb_tx_buffer *tx_buffer;
+ union e1000_adv_tx_desc *tx_desc;
+ unsigned int total_bytes = 0, total_packets = 0;
+ unsigned int budget = q_vector->tx.work_limit;
+ unsigned int i = tx_ring->next_to_clean;
+
+ if (test_bit(__IGB_DOWN, &adapter->state))
+ return true;
+
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ tx_desc = IGB_TX_DESC(tx_ring, i);
+ i -= tx_ring->count;
+
+ do {
+ union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+ /* if next_to_watch is not set then there is no work pending */
+ if (!eop_desc)
+ break;
+
+ /* prevent any other reads prior to eop_desc */
+ read_barrier_depends();
+
+ /* if DD is not set pending work has not been completed */
+ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
+ break;
+
+ /* clear next_to_watch to prevent false hangs */
+ tx_buffer->next_to_watch = NULL;
+
+ /* update the statistics for this packet */
+ total_bytes += tx_buffer->bytecount;
+ total_packets += tx_buffer->gso_segs;
+
+ /* free the skb */
+ dev_kfree_skb_any(tx_buffer->skb);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+
+ /* clear tx_buffer data */
+ tx_buffer->skb = NULL;
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ /* clear last DMA location and unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = IGB_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buffer, len)) {
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+ }
+ }
+
+ /* move us one more past the eop_desc for start of next pkt */
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = IGB_TX_DESC(tx_ring, 0);
+ }
+
+ /* issue prefetch for next Tx descriptor */
+ prefetch(tx_desc);
+
+ /* update budget accounting */
+ budget--;
+ } while (likely(budget));
+
+ netdev_tx_completed_queue(txring_txq(tx_ring),
+ total_packets, total_bytes);
+
+ i += tx_ring->count;
+ tx_ring->next_to_clean = i;
+ tx_ring->tx_stats.bytes += total_bytes;
+ tx_ring->tx_stats.packets += total_packets;
+ q_vector->tx.total_bytes += total_bytes;
+ q_vector->tx.total_packets += total_packets;
+
+#ifdef DEBUG
+ if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags) &&
+ !(adapter->disable_hw_reset && adapter->tx_hang_detected)) {
+#else
+ if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
+#endif
+ struct e1000_hw *hw = &adapter->hw;
+
+ /* Detect a transmit hang in hardware, this serializes the
+ * check with the clearing of time_stamp and movement of i */
+ clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+ if (tx_buffer->next_to_watch &&
+ time_after(jiffies, tx_buffer->time_stamp +
+ (adapter->tx_timeout_factor * HZ))
+ && !(E1000_READ_REG(hw, E1000_STATUS) &
+ E1000_STATUS_TXOFF)) {
+
+ /* detected Tx unit hang */
+#ifdef DEBUG
+ adapter->tx_hang_detected = TRUE;
+ if (adapter->disable_hw_reset) {
+ DPRINTK(DRV, WARNING,
+ "Deactivating netdev watchdog timer\n");
+ if (del_timer(&netdev_ring(tx_ring)->watchdog_timer))
+ dev_put(netdev_ring(tx_ring));
+#ifndef HAVE_NET_DEVICE_OPS
+ netdev_ring(tx_ring)->tx_timeout = NULL;
+#endif
+ }
+#endif /* DEBUG */
+ dev_err(tx_ring->dev,
+ "Detected Tx Unit Hang\n"
+ " Tx Queue <%d>\n"
+ " TDH <%x>\n"
+ " TDT <%x>\n"
+ " next_to_use <%x>\n"
+ " next_to_clean <%x>\n"
+ "buffer_info[next_to_clean]\n"
+ " time_stamp <%lx>\n"
+ " next_to_watch <%p>\n"
+ " jiffies <%lx>\n"
+ " desc.status <%x>\n",
+ tx_ring->queue_index,
+ E1000_READ_REG(hw, E1000_TDH(tx_ring->reg_idx)),
+ readl(tx_ring->tail),
+ tx_ring->next_to_use,
+ tx_ring->next_to_clean,
+ tx_buffer->time_stamp,
+ tx_buffer->next_to_watch,
+ jiffies,
+ tx_buffer->next_to_watch->wb.status);
+ if (netif_is_multiqueue(netdev_ring(tx_ring)))
+ netif_stop_subqueue(netdev_ring(tx_ring),
+ ring_queue_index(tx_ring));
+ else
+ netif_stop_queue(netdev_ring(tx_ring));
+
+ /* we are about to reset, no point in enabling stuff */
+ return true;
+ }
+ }
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+ if (unlikely(total_packets &&
+ netif_carrier_ok(netdev_ring(tx_ring)) &&
+ igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
+ /* Make sure that anybody stopping the queue after this
+ * sees the new next_to_clean.
+ */
+ smp_mb();
+ if (netif_is_multiqueue(netdev_ring(tx_ring))) {
+ if (__netif_subqueue_stopped(netdev_ring(tx_ring),
+ ring_queue_index(tx_ring)) &&
+ !(test_bit(__IGB_DOWN, &adapter->state))) {
+ netif_wake_subqueue(netdev_ring(tx_ring),
+ ring_queue_index(tx_ring));
+ tx_ring->tx_stats.restart_queue++;
+ }
+ } else {
+ if (netif_queue_stopped(netdev_ring(tx_ring)) &&
+ !(test_bit(__IGB_DOWN, &adapter->state))) {
+ netif_wake_queue(netdev_ring(tx_ring));
+ tx_ring->tx_stats.restart_queue++;
+ }
+ }
+ }
+
+ return !!budget;
+}
+
+#ifdef HAVE_VLAN_RX_REGISTER
+/**
+ * igb_receive_skb - helper function to handle rx indications
+ * @q_vector: structure containing interrupt and ring information
+ * @skb: packet to send up
+ **/
+static void igb_receive_skb(struct igb_q_vector *q_vector,
+ struct sk_buff *skb)
+{
+ struct vlan_group **vlgrp = netdev_priv(skb->dev);
+
+ if (IGB_CB(skb)->vid) {
+ if (*vlgrp) {
+ vlan_gro_receive(&q_vector->napi, *vlgrp,
+ IGB_CB(skb)->vid, skb);
+ } else {
+ dev_kfree_skb_any(skb);
+ }
+ } else {
+ napi_gro_receive(&q_vector->napi, skb);
+ }
+}
+
+#endif /* HAVE_VLAN_RX_REGISTER */
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+/**
+ * igb_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void igb_reuse_rx_page(struct igb_ring *rx_ring,
+ struct igb_rx_buffer *old_buff)
+{
+ struct igb_rx_buffer *new_buff;
+ u16 nta = rx_ring->next_to_alloc;
+
+ new_buff = &rx_ring->rx_buffer_info[nta];
+
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ /* transfer page from old buffer to new buffer */
+ memcpy(new_buff, old_buff, sizeof(struct igb_rx_buffer));
+
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
+ old_buff->page_offset,
+ IGB_RX_BUFSZ,
+ DMA_FROM_DEVICE);
+}
+
+static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
+ struct page *page,
+ unsigned int truesize)
+{
+ /* avoid re-using remote pages */
+ if (unlikely(page_to_nid(page) != numa_node_id()))
+ return false;
+
+#if (PAGE_SIZE < 8192)
+ /* if we are only owner of page we can reuse it */
+ if (unlikely(page_count(page) != 1))
+ return false;
+
+ /* flip page offset to other buffer */
+ rx_buffer->page_offset ^= IGB_RX_BUFSZ;
+
+#else
+ /* move offset up to the next cache line */
+ rx_buffer->page_offset += truesize;
+
+ if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ))
+ return false;
+#endif
+
+ /* bump ref count on page before it is given to the stack */
+ get_page(page);
+
+ return true;
+}
+
+/**
+ * igb_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buff to place the data into
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ **/
+static bool igb_add_rx_frag(struct igb_ring *rx_ring,
+ struct igb_rx_buffer *rx_buffer,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct page *page = rx_buffer->page;
+ unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = IGB_RX_BUFSZ;
+#else
+ unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+#endif
+
+ if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) {
+ unsigned char *va = page_address(page) + rx_buffer->page_offset;
+
+#ifdef HAVE_PTP_1588_CLOCK
+ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+ igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
+ va += IGB_TS_HDR_LEN;
+ size -= IGB_TS_HDR_LEN;
+ }
+#endif /* HAVE_PTP_1588_CLOCK */
+
+ memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+ /* we can reuse buffer as-is, just make sure it is local */
+ if (likely(page_to_nid(page) == numa_node_id()))
+ return true;
+
+ /* this page cannot be reused so discard it */
+ put_page(page);
+ return false;
+ }
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+ rx_buffer->page_offset, size, truesize);
+
+ return igb_can_reuse_rx_page(rx_buffer, page, truesize);
+}
+
+static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct igb_rx_buffer *rx_buffer;
+ struct page *page;
+
+ rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+
+ page = rx_buffer->page;
+ prefetchw(page);
+
+ if (likely(!skb)) {
+ void *page_addr = page_address(page) +
+ rx_buffer->page_offset;
+
+ /* prefetch first cache line of first page */
+ prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+ prefetch(page_addr + L1_CACHE_BYTES);
+#endif
+
+ /* allocate a skb to store the frags */
+ skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+ IGB_RX_HDR_LEN);
+ if (unlikely(!skb)) {
+ rx_ring->rx_stats.alloc_failed++;
+ return NULL;
+ }
+
+ /*
+ * we will be copying header into skb->data in
+ * pskb_may_pull so it is in our interest to prefetch
+ * it now to avoid a possible cache miss
+ */
+ prefetchw(skb->data);
+ }
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ IGB_RX_BUFSZ,
+ DMA_FROM_DEVICE);
+
+ /* pull page into skb */
+ if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+ /* hand second half of page back to the ring */
+ igb_reuse_rx_page(rx_ring, rx_buffer);
+ } else {
+ /* we are not reusing the buffer so unmap it */
+ dma_unmap_page(rx_ring->dev, rx_buffer->dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ }
+
+ /* clear contents of rx_buffer */
+ rx_buffer->page = NULL;
+
+ return skb;
+}
+
+#endif
+static inline void igb_rx_checksum(struct igb_ring *ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ skb_checksum_none_assert(skb);
+
+ /* Ignore Checksum bit is set */
+ if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
+ return;
+
+ /* Rx checksum disabled via ethtool */
+ if (!(netdev_ring(ring)->features & NETIF_F_RXCSUM))
+ return;
+
+ /* TCP/UDP checksum error bit is set */
+ if (igb_test_staterr(rx_desc,
+ E1000_RXDEXT_STATERR_TCPE |
+ E1000_RXDEXT_STATERR_IPE)) {
+ /*
+ * work around errata with sctp packets where the TCPE aka
+ * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
+ * packets, (aka let the stack check the crc32c)
+ */
+ if (!((skb->len == 60) &&
+ test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags)))
+ ring->rx_stats.csum_err++;
+
+ /* let the stack verify checksum errors */
+ return;
+ }
+ /* It must be a TCP or UDP packet with a valid checksum */
+ if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
+ E1000_RXD_STAT_UDPCS))
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+}
+
+#ifdef NETIF_F_RXHASH
+static inline void igb_rx_hash(struct igb_ring *ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ if (netdev_ring(ring)->features & NETIF_F_RXHASH)
+ skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
+ PKT_HASH_TYPE_L3);
+}
+
+#endif
+#ifndef IGB_NO_LRO
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+/**
+ * igb_merge_active_tail - merge active tail into lro skb
+ * @tail: pointer to active tail in frag_list
+ *
+ * This function merges the length and data of an active tail into the
+ * skb containing the frag_list. It resets the tail's pointer to the head,
+ * but it leaves the heads pointer to tail intact.
+ **/
+static inline struct sk_buff *igb_merge_active_tail(struct sk_buff *tail)
+{
+ struct sk_buff *head = IGB_CB(tail)->head;
+
+ if (!head)
+ return tail;
+
+ head->len += tail->len;
+ head->data_len += tail->len;
+ head->truesize += tail->len;
+
+ IGB_CB(tail)->head = NULL;
+
+ return head;
+}
+
+/**
+ * igb_add_active_tail - adds an active tail into the skb frag_list
+ * @head: pointer to the start of the skb
+ * @tail: pointer to active tail to add to frag_list
+ *
+ * This function adds an active tail to the end of the frag list. This tail
+ * will still be receiving data so we cannot yet ad it's stats to the main
+ * skb. That is done via igb_merge_active_tail.
+ **/
+static inline void igb_add_active_tail(struct sk_buff *head, struct sk_buff *tail)
+{
+ struct sk_buff *old_tail = IGB_CB(head)->tail;
+
+ if (old_tail) {
+ igb_merge_active_tail(old_tail);
+ old_tail->next = tail;
+ } else {
+ skb_shinfo(head)->frag_list = tail;
+ }
+
+ IGB_CB(tail)->head = head;
+ IGB_CB(head)->tail = tail;
+
+ IGB_CB(head)->append_cnt++;
+}
+
+/**
+ * igb_close_active_frag_list - cleanup pointers on a frag_list skb
+ * @head: pointer to head of an active frag list
+ *
+ * This function will clear the frag_tail_tracker pointer on an active
+ * frag_list and returns true if the pointer was actually set
+ **/
+static inline bool igb_close_active_frag_list(struct sk_buff *head)
+{
+ struct sk_buff *tail = IGB_CB(head)->tail;
+
+ if (!tail)
+ return false;
+
+ igb_merge_active_tail(tail);
+
+ IGB_CB(head)->tail = NULL;
+
+ return true;
+}
+
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+/**
+ * igb_can_lro - returns true if packet is TCP/IPV4 and LRO is enabled
+ * @adapter: board private structure
+ * @rx_desc: pointer to the rx descriptor
+ * @skb: pointer to the skb to be merged
+ *
+ **/
+static inline bool igb_can_lro(struct igb_ring *rx_ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct iphdr *iph = (struct iphdr *)skb->data;
+ __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+
+ /* verify hardware indicates this is IPv4/TCP */
+ if((!(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) ||
+ !(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4))))
+ return false;
+
+ /* .. and LRO is enabled */
+ if (!(netdev_ring(rx_ring)->features & NETIF_F_LRO))
+ return false;
+
+ /* .. and we are not in promiscuous mode */
+ if (netdev_ring(rx_ring)->flags & IFF_PROMISC)
+ return false;
+
+ /* .. and the header is large enough for us to read IP/TCP fields */
+ if (!pskb_may_pull(skb, sizeof(struct igb_lrohdr)))
+ return false;
+
+ /* .. and there are no VLANs on packet */
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ return false;
+
+ /* .. and we are version 4 with no options */
+ if (*(u8 *)iph != 0x45)
+ return false;
+
+ /* .. and the packet is not fragmented */
+ if (iph->frag_off & htons(IP_MF | IP_OFFSET))
+ return false;
+
+ /* .. and that next header is TCP */
+ if (iph->protocol != IPPROTO_TCP)
+ return false;
+
+ return true;
+}
+
+static inline struct igb_lrohdr *igb_lro_hdr(struct sk_buff *skb)
+{
+ return (struct igb_lrohdr *)skb->data;
+}
+
+/**
+ * igb_lro_flush - Indicate packets to upper layer.
+ *
+ * Update IP and TCP header part of head skb if more than one
+ * skb's chained and indicate packets to upper layer.
+ **/
+static void igb_lro_flush(struct igb_q_vector *q_vector,
+ struct sk_buff *skb)
+{
+ struct igb_lro_list *lrolist = &q_vector->lrolist;
+
+ __skb_unlink(skb, &lrolist->active);
+
+ if (IGB_CB(skb)->append_cnt) {
+ struct igb_lrohdr *lroh = igb_lro_hdr(skb);
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ /* close any active lro contexts */
+ igb_close_active_frag_list(skb);
+
+#endif
+ /* incorporate ip header and re-calculate checksum */
+ lroh->iph.tot_len = ntohs(skb->len);
+ lroh->iph.check = 0;
+
+ /* header length is 5 since we know no options exist */
+ lroh->iph.check = ip_fast_csum((u8 *)lroh, 5);
+
+ /* clear TCP checksum to indicate we are an LRO frame */
+ lroh->th.check = 0;
+
+ /* incorporate latest timestamp into the tcp header */
+ if (IGB_CB(skb)->tsecr) {
+ lroh->ts[2] = IGB_CB(skb)->tsecr;
+ lroh->ts[1] = htonl(IGB_CB(skb)->tsval);
+ }
+#ifdef NETIF_F_GSO
+
+ skb_shinfo(skb)->gso_size = IGB_CB(skb)->mss;
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+#endif
+ }
+
+#ifdef HAVE_VLAN_RX_REGISTER
+ igb_receive_skb(q_vector, skb);
+#else
+ napi_gro_receive(&q_vector->napi, skb);
+#endif
+ lrolist->stats.flushed++;
+}
+
+static void igb_lro_flush_all(struct igb_q_vector *q_vector)
+{
+ struct igb_lro_list *lrolist = &q_vector->lrolist;
+ struct sk_buff *skb, *tmp;
+
+ skb_queue_reverse_walk_safe(&lrolist->active, skb, tmp)
+ igb_lro_flush(q_vector, skb);
+}
+
+/*
+ * igb_lro_header_ok - Main LRO function.
+ **/
+static void igb_lro_header_ok(struct sk_buff *skb)
+{
+ struct igb_lrohdr *lroh = igb_lro_hdr(skb);
+ u16 opt_bytes, data_len;
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ IGB_CB(skb)->tail = NULL;
+#endif
+ IGB_CB(skb)->tsecr = 0;
+ IGB_CB(skb)->append_cnt = 0;
+ IGB_CB(skb)->mss = 0;
+
+ /* ensure that the checksum is valid */
+ if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+ return;
+
+ /* If we see CE codepoint in IP header, packet is not mergeable */
+ if (INET_ECN_is_ce(ipv4_get_dsfield(&lroh->iph)))
+ return;
+
+ /* ensure no bits set besides ack or psh */
+ if (lroh->th.fin || lroh->th.syn || lroh->th.rst ||
+ lroh->th.urg || lroh->th.ece || lroh->th.cwr ||
+ !lroh->th.ack)
+ return;
+
+ /* store the total packet length */
+ data_len = ntohs(lroh->iph.tot_len);
+
+ /* remove any padding from the end of the skb */
+ __pskb_trim(skb, data_len);
+
+ /* remove header length from data length */
+ data_len -= sizeof(struct igb_lrohdr);
+
+ /*
+ * check for timestamps. Since the only option we handle are timestamps,
+ * we only have to handle the simple case of aligned timestamps
+ */
+ opt_bytes = (lroh->th.doff << 2) - sizeof(struct tcphdr);
+ if (opt_bytes != 0) {
+ if ((opt_bytes != TCPOLEN_TSTAMP_ALIGNED) ||
+ !pskb_may_pull(skb, sizeof(struct igb_lrohdr) +
+ TCPOLEN_TSTAMP_ALIGNED) ||
+ (lroh->ts[0] != htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP)) ||
+ (lroh->ts[2] == 0)) {
+ return;
+ }
+
+ IGB_CB(skb)->tsval = ntohl(lroh->ts[1]);
+ IGB_CB(skb)->tsecr = lroh->ts[2];
+
+ data_len -= TCPOLEN_TSTAMP_ALIGNED;
+ }
+
+ /* record data_len as mss for the packet */
+ IGB_CB(skb)->mss = data_len;
+ IGB_CB(skb)->next_seq = ntohl(lroh->th.seq);
+}
+
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+static void igb_merge_frags(struct sk_buff *lro_skb, struct sk_buff *new_skb)
+{
+ struct skb_shared_info *sh_info;
+ struct skb_shared_info *new_skb_info;
+ unsigned int data_len;
+
+ sh_info = skb_shinfo(lro_skb);
+ new_skb_info = skb_shinfo(new_skb);
+
+ /* copy frags into the last skb */
+ memcpy(sh_info->frags + sh_info->nr_frags,
+ new_skb_info->frags,
+ new_skb_info->nr_frags * sizeof(skb_frag_t));
+
+ /* copy size data over */
+ sh_info->nr_frags += new_skb_info->nr_frags;
+ data_len = IGB_CB(new_skb)->mss;
+ lro_skb->len += data_len;
+ lro_skb->data_len += data_len;
+ lro_skb->truesize += data_len;
+
+ /* wipe record of data from new_skb */
+ new_skb_info->nr_frags = 0;
+ new_skb->len = new_skb->data_len = 0;
+ dev_kfree_skb_any(new_skb);
+}
+
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+/**
+ * igb_lro_receive - if able, queue skb into lro chain
+ * @q_vector: structure containing interrupt and ring information
+ * @new_skb: pointer to current skb being checked
+ *
+ * Checks whether the skb given is eligible for LRO and if that's
+ * fine chains it to the existing lro_skb based on flowid. If an LRO for
+ * the flow doesn't exist create one.
+ **/
+static void igb_lro_receive(struct igb_q_vector *q_vector,
+ struct sk_buff *new_skb)
+{
+ struct sk_buff *lro_skb;
+ struct igb_lro_list *lrolist = &q_vector->lrolist;
+ struct igb_lrohdr *lroh = igb_lro_hdr(new_skb);
+ __be32 saddr = lroh->iph.saddr;
+ __be32 daddr = lroh->iph.daddr;
+ __be32 tcp_ports = *(__be32 *)&lroh->th;
+ u16 data_len;
+#ifdef HAVE_VLAN_RX_REGISTER
+ u16 vid = IGB_CB(new_skb)->vid;
+#else
+ u16 vid = new_skb->vlan_tci;
+#endif
+
+ igb_lro_header_ok(new_skb);
+
+ /*
+ * we have a packet that might be eligible for LRO,
+ * so see if it matches anything we might expect
+ */
+ skb_queue_walk(&lrolist->active, lro_skb) {
+ if (*(__be32 *)&igb_lro_hdr(lro_skb)->th != tcp_ports ||
+ igb_lro_hdr(lro_skb)->iph.saddr != saddr ||
+ igb_lro_hdr(lro_skb)->iph.daddr != daddr)
+ continue;
+
+#ifdef HAVE_VLAN_RX_REGISTER
+ if (IGB_CB(lro_skb)->vid != vid)
+#else
+ if (lro_skb->vlan_tci != vid)
+#endif
+ continue;
+
+ /* out of order packet */
+ if (IGB_CB(lro_skb)->next_seq != IGB_CB(new_skb)->next_seq) {
+ igb_lro_flush(q_vector, lro_skb);
+ IGB_CB(new_skb)->mss = 0;
+ break;
+ }
+
+ /* TCP timestamp options have changed */
+ if (!IGB_CB(lro_skb)->tsecr != !IGB_CB(new_skb)->tsecr) {
+ igb_lro_flush(q_vector, lro_skb);
+ break;
+ }
+
+ /* make sure timestamp values are increasing */
+ if (IGB_CB(lro_skb)->tsecr &&
+ IGB_CB(lro_skb)->tsval > IGB_CB(new_skb)->tsval) {
+ igb_lro_flush(q_vector, lro_skb);
+ IGB_CB(new_skb)->mss = 0;
+ break;
+ }
+
+ data_len = IGB_CB(new_skb)->mss;
+
+ /* Check for all of the above below
+ * malformed header
+ * no tcp data
+ * resultant packet would be too large
+ * new skb is larger than our current mss
+ * data would remain in header
+ * we would consume more frags then the sk_buff contains
+ * ack sequence numbers changed
+ * window size has changed
+ */
+ if (data_len == 0 ||
+ data_len > IGB_CB(lro_skb)->mss ||
+ data_len > IGB_CB(lro_skb)->free ||
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ data_len != new_skb->data_len ||
+ skb_shinfo(new_skb)->nr_frags >=
+ (MAX_SKB_FRAGS - skb_shinfo(lro_skb)->nr_frags) ||
+#endif
+ igb_lro_hdr(lro_skb)->th.ack_seq != lroh->th.ack_seq ||
+ igb_lro_hdr(lro_skb)->th.window != lroh->th.window) {
+ igb_lro_flush(q_vector, lro_skb);
+ break;
+ }
+
+ /* Remove IP and TCP header*/
+ skb_pull(new_skb, new_skb->len - data_len);
+
+ /* update timestamp and timestamp echo response */
+ IGB_CB(lro_skb)->tsval = IGB_CB(new_skb)->tsval;
+ IGB_CB(lro_skb)->tsecr = IGB_CB(new_skb)->tsecr;
+
+ /* update sequence and free space */
+ IGB_CB(lro_skb)->next_seq += data_len;
+ IGB_CB(lro_skb)->free -= data_len;
+
+ /* update append_cnt */
+ IGB_CB(lro_skb)->append_cnt++;
+
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ /* if header is empty pull pages into current skb */
+ igb_merge_frags(lro_skb, new_skb);
+#else
+ /* chain this new skb in frag_list */
+ igb_add_active_tail(lro_skb, new_skb);
+#endif
+
+ if ((data_len < IGB_CB(lro_skb)->mss) || lroh->th.psh ||
+ skb_shinfo(lro_skb)->nr_frags == MAX_SKB_FRAGS) {
+ igb_lro_hdr(lro_skb)->th.psh |= lroh->th.psh;
+ igb_lro_flush(q_vector, lro_skb);
+ }
+
+ lrolist->stats.coal++;
+ return;
+ }
+
+ if (IGB_CB(new_skb)->mss && !lroh->th.psh) {
+ /* if we are at capacity flush the tail */
+ if (skb_queue_len(&lrolist->active) >= IGB_LRO_MAX) {
+ lro_skb = skb_peek_tail(&lrolist->active);
+ if (lro_skb)
+ igb_lro_flush(q_vector, lro_skb);
+ }
+
+ /* update sequence and free space */
+ IGB_CB(new_skb)->next_seq += IGB_CB(new_skb)->mss;
+ IGB_CB(new_skb)->free = 65521 - new_skb->len;
+
+ /* .. and insert at the front of the active list */
+ __skb_queue_head(&lrolist->active, new_skb);
+
+ lrolist->stats.coal++;
+ return;
+ }
+
+ /* packet not handled by any of the above, pass it to the stack */
+#ifdef HAVE_VLAN_RX_REGISTER
+ igb_receive_skb(q_vector, new_skb);
+#else
+ napi_gro_receive(&q_vector->napi, new_skb);
+#endif
+}
+
+#endif /* IGB_NO_LRO */
+/**
+ * igb_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, timestamp, protocol, and
+ * other fields within the skb.
+ **/
+static void igb_process_skb_fields(struct igb_ring *rx_ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct net_device *dev = rx_ring->netdev;
+ __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+
+#ifdef NETIF_F_RXHASH
+ igb_rx_hash(rx_ring, rx_desc, skb);
+
+#endif
+ igb_rx_checksum(rx_ring, rx_desc, skb);
+
+ /* update packet type stats */
+ if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4))
+ rx_ring->rx_stats.ipv4_packets++;
+ else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4_EX))
+ rx_ring->rx_stats.ipv4e_packets++;
+ else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6))
+ rx_ring->rx_stats.ipv6_packets++;
+ else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6_EX))
+ rx_ring->rx_stats.ipv6e_packets++;
+ else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP))
+ rx_ring->rx_stats.tcp_packets++;
+ else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_UDP))
+ rx_ring->rx_stats.udp_packets++;
+ else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_SCTP))
+ rx_ring->rx_stats.sctp_packets++;
+ else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_NFS))
+ rx_ring->rx_stats.nfs_packets++;
+
+#ifdef HAVE_PTP_1588_CLOCK
+ igb_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
+#endif /* HAVE_PTP_1588_CLOCK */
+
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+ if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+#else
+ if ((dev->features & NETIF_F_HW_VLAN_RX) &&
+#endif
+ igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
+ u16 vid = 0;
+ if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
+ test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
+ vid = be16_to_cpu(rx_desc->wb.upper.vlan);
+ else
+ vid = le16_to_cpu(rx_desc->wb.upper.vlan);
+#ifdef HAVE_VLAN_RX_REGISTER
+ IGB_CB(skb)->vid = vid;
+ } else {
+ IGB_CB(skb)->vid = 0;
+#else
+
+#ifdef HAVE_VLAN_PROTOCOL
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+#else
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+#endif
+
+
+#endif
+ }
+
+ skb_record_rx_queue(skb, rx_ring->queue_index);
+
+ skb->protocol = eth_type_trans(skb, dev);
+}
+
+/**
+ * igb_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ *
+ * This function updates next to clean. If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool igb_is_non_eop(struct igb_ring *rx_ring,
+ union e1000_adv_rx_desc *rx_desc)
+{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ /* fetch, update, and store next to clean */
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+
+ prefetch(IGB_RX_DESC(rx_ring, ntc));
+
+ if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)))
+ return false;
+
+ return true;
+}
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+/* igb_clean_rx_irq -- * legacy */
+static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
+{
+ struct igb_ring *rx_ring = q_vector->rx.ring;
+ unsigned int total_bytes = 0, total_packets = 0;
+ u16 cleaned_count = igb_desc_unused(rx_ring);
+
+ do {
+ struct igb_rx_buffer *rx_buffer;
+ union e1000_adv_rx_desc *rx_desc;
+ struct sk_buff *skb;
+ u16 ntc;
+
+ /* return some buffers to hardware, one at a time is too slow */
+ if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
+ igb_alloc_rx_buffers(rx_ring, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ ntc = rx_ring->next_to_clean;
+ rx_desc = IGB_RX_DESC(rx_ring, ntc);
+ rx_buffer = &rx_ring->rx_buffer_info[ntc];
+
+ if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
+ break;
+
+ /*
+ * This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we know the
+ * RXD_STAT_DD bit is set
+ */
+ rmb();
+
+ skb = rx_buffer->skb;
+
+ prefetch(skb->data);
+
+ /* pull the header of the skb in */
+ __skb_put(skb, le16_to_cpu(rx_desc->wb.upper.length));
+
+ /* clear skb reference in buffer info structure */
+ rx_buffer->skb = NULL;
+
+ cleaned_count++;
+
+ BUG_ON(igb_is_non_eop(rx_ring, rx_desc));
+
+ dma_unmap_single(rx_ring->dev, rx_buffer->dma,
+ rx_ring->rx_buffer_len,
+ DMA_FROM_DEVICE);
+ rx_buffer->dma = 0;
+
+ if (igb_test_staterr(rx_desc,
+ E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ total_bytes += skb->len;
+
+ /* populate checksum, timestamp, VLAN, and protocol */
+ igb_process_skb_fields(rx_ring, rx_desc, skb);
+
+#ifndef IGB_NO_LRO
+ if (igb_can_lro(rx_ring, rx_desc, skb))
+ igb_lro_receive(q_vector, skb);
+ else
+#endif
+#ifdef HAVE_VLAN_RX_REGISTER
+ igb_receive_skb(q_vector, skb);
+#else
+ napi_gro_receive(&q_vector->napi, skb);
+#endif
+
+#ifndef NETIF_F_GRO
+ netdev_ring(rx_ring)->last_rx = jiffies;
+
+#endif
+ /* update budget accounting */
+ total_packets++;
+ } while (likely(total_packets < budget));
+
+ rx_ring->rx_stats.packets += total_packets;
+ rx_ring->rx_stats.bytes += total_bytes;
+ q_vector->rx.total_packets += total_packets;
+ q_vector->rx.total_bytes += total_bytes;
+
+ if (cleaned_count)
+ igb_alloc_rx_buffers(rx_ring, cleaned_count);
+
+#ifndef IGB_NO_LRO
+ igb_lro_flush_all(q_vector);
+
+#endif /* IGB_NO_LRO */
+ return total_packets < budget;
+}
+#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+/**
+ * igb_get_headlen - determine size of header for LRO/GRO
+ * @data: pointer to the start of the headers
+ * @max_len: total length of section to find headers in
+ *
+ * This function is meant to determine the length of headers that will
+ * be recognized by hardware for LRO, and GRO offloads. The main
+ * motivation of doing this is to only perform one pull for IPv4 TCP
+ * packets so that we can do basic things like calculating the gso_size
+ * based on the average data per packet.
+ **/
+static unsigned int igb_get_headlen(unsigned char *data,
+ unsigned int max_len)
+{
+ union {
+ unsigned char *network;
+ /* l2 headers */
+ struct ethhdr *eth;
+ struct vlan_hdr *vlan;
+ /* l3 headers */
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ } hdr;
+ __be16 protocol;
+ u8 nexthdr = 0; /* default to not TCP */
+ u8 hlen;
+
+ /* this should never happen, but better safe than sorry */
+ if (max_len < ETH_HLEN)
+ return max_len;
+
+ /* initialize network frame pointer */
+ hdr.network = data;
+
+ /* set first protocol and move network header forward */
+ protocol = hdr.eth->h_proto;
+ hdr.network += ETH_HLEN;
+
+ /* handle any vlan tag if present */
+ if (protocol == __constant_htons(ETH_P_8021Q)) {
+ if ((hdr.network - data) > (max_len - VLAN_HLEN))
+ return max_len;
+
+ protocol = hdr.vlan->h_vlan_encapsulated_proto;
+ hdr.network += VLAN_HLEN;
+ }
+
+ /* handle L3 protocols */
+ if (protocol == __constant_htons(ETH_P_IP)) {
+ if ((hdr.network - data) > (max_len - sizeof(struct iphdr)))
+ return max_len;
+
+ /* access ihl as a u8 to avoid unaligned access on ia64 */
+ hlen = (hdr.network[0] & 0x0F) << 2;
+
+ /* verify hlen meets minimum size requirements */
+ if (hlen < sizeof(struct iphdr))
+ return hdr.network - data;
+
+ /* record next protocol if header is present */
+ if (!(hdr.ipv4->frag_off & htons(IP_OFFSET)))
+ nexthdr = hdr.ipv4->protocol;
+#ifdef NETIF_F_TSO6
+ } else if (protocol == __constant_htons(ETH_P_IPV6)) {
+ if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr)))
+ return max_len;
+
+ /* record next protocol */
+ nexthdr = hdr.ipv6->nexthdr;
+ hlen = sizeof(struct ipv6hdr);
+#endif /* NETIF_F_TSO6 */
+ } else {
+ return hdr.network - data;
+ }
+
+ /* relocate pointer to start of L4 header */
+ hdr.network += hlen;
+
+ /* finally sort out TCP */
+ if (nexthdr == IPPROTO_TCP) {
+ if ((hdr.network - data) > (max_len - sizeof(struct tcphdr)))
+ return max_len;
+
+ /* access doff as a u8 to avoid unaligned access on ia64 */
+ hlen = (hdr.network[12] & 0xF0) >> 2;
+
+ /* verify hlen meets minimum size requirements */
+ if (hlen < sizeof(struct tcphdr))
+ return hdr.network - data;
+
+ hdr.network += hlen;
+ } else if (nexthdr == IPPROTO_UDP) {
+ if ((hdr.network - data) > (max_len - sizeof(struct udphdr)))
+ return max_len;
+
+ hdr.network += sizeof(struct udphdr);
+ }
+
+ /*
+ * If everything has gone correctly hdr.network should be the
+ * data section of the packet and will be the end of the header.
+ * If not then it probably represents the end of the last recognized
+ * header.
+ */
+ if ((hdr.network - data) < max_len)
+ return hdr.network - data;
+ else
+ return max_len;
+}
+
+/**
+ * igb_pull_tail - igb specific version of skb_pull_tail
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being adjusted
+ *
+ * This function is an igb specific version of __pskb_pull_tail. The
+ * main difference between this version and the original function is that
+ * this function can make several assumptions about the state of things
+ * that allow for significant optimizations versus the standard function.
+ * As a result we can do things like drop a frag and maintain an accurate
+ * truesize for the skb.
+ */
+static void igb_pull_tail(struct igb_ring *rx_ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+ unsigned char *va;
+ unsigned int pull_len;
+
+ /*
+ * it is valid to use page_address instead of kmap since we are
+ * working with pages allocated out of the lomem pool per
+ * alloc_page(GFP_ATOMIC)
+ */
+ va = skb_frag_address(frag);
+
+#ifdef HAVE_PTP_1588_CLOCK
+ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+ /* retrieve timestamp from buffer */
+ igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
+
+ /* update pointers to remove timestamp header */
+ skb_frag_size_sub(frag, IGB_TS_HDR_LEN);
+ frag->page_offset += IGB_TS_HDR_LEN;
+ skb->data_len -= IGB_TS_HDR_LEN;
+ skb->len -= IGB_TS_HDR_LEN;
+
+ /* move va to start of packet data */
+ va += IGB_TS_HDR_LEN;
+ }
+#endif /* HAVE_PTP_1588_CLOCK */
+
+ /*
+ * we need the header to contain the greater of either ETH_HLEN or
+ * 60 bytes if the skb->len is less than 60 for skb_pad.
+ */
+ pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN);
+
+ /* align pull length to size of long to optimize memcpy performance */
+ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+ /* update all of the pointers */
+ skb_frag_size_sub(frag, pull_len);
+ frag->page_offset += pull_len;
+ skb->data_len -= pull_len;
+ skb->tail += pull_len;
+}
+
+/**
+ * igb_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool igb_cleanup_headers(struct igb_ring *rx_ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+
+ if (unlikely((igb_test_staterr(rx_desc,
+ E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
+ struct net_device *netdev = rx_ring->netdev;
+ if (!(netdev->features & NETIF_F_RXALL)) {
+ dev_kfree_skb_any(skb);
+ return true;
+ }
+ }
+
+ /* place header in linear portion of buffer */
+ if (skb_is_nonlinear(skb))
+ igb_pull_tail(rx_ring, rx_desc, skb);
+
+ /* if skb_pad returns an error the skb was freed */
+ if (unlikely(skb->len < 60)) {
+ int pad_len = 60 - skb->len;
+
+ if (skb_pad(skb, pad_len))
+ return true;
+ __skb_put(skb, pad_len);
+ }
+
+ return false;
+}
+
+/* igb_clean_rx_irq -- * packet split */
+static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
+{
+ struct igb_ring *rx_ring = q_vector->rx.ring;
+ struct sk_buff *skb = rx_ring->skb;
+ unsigned int total_bytes = 0, total_packets = 0;
+ u16 cleaned_count = igb_desc_unused(rx_ring);
+
+ do {
+ union e1000_adv_rx_desc *rx_desc;
+
+ /* return some buffers to hardware, one at a time is too slow */
+ if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
+ igb_alloc_rx_buffers(rx_ring, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+ if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
+ break;
+
+ /*
+ * This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we know the
+ * RXD_STAT_DD bit is set
+ */
+ rmb();
+
+ /* retrieve a buffer from the ring */
+ skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
+
+ /* exit if we failed to retrieve a buffer */
+ if (!skb)
+ break;
+
+ cleaned_count++;
+
+ /* fetch next buffer in frame if non-eop */
+ if (igb_is_non_eop(rx_ring, rx_desc))
+ continue;
+
+ /* verify the packet layout is correct */
+ if (igb_cleanup_headers(rx_ring, rx_desc, skb)) {
+ skb = NULL;
+ continue;
+ }
+
+ /* probably a little skewed due to removing CRC */
+ total_bytes += skb->len;
+
+ /* populate checksum, timestamp, VLAN, and protocol */
+ igb_process_skb_fields(rx_ring, rx_desc, skb);
+
+#ifndef IGB_NO_LRO
+ if (igb_can_lro(rx_ring, rx_desc, skb))
+ igb_lro_receive(q_vector, skb);
+ else
+#endif
+#ifdef HAVE_VLAN_RX_REGISTER
+ igb_receive_skb(q_vector, skb);
+#else
+ napi_gro_receive(&q_vector->napi, skb);
+#endif
+#ifndef NETIF_F_GRO
+
+ netdev_ring(rx_ring)->last_rx = jiffies;
+#endif
+
+ /* reset skb pointer */
+ skb = NULL;
+
+ /* update budget accounting */
+ total_packets++;
+ } while (likely(total_packets < budget));
+
+ /* place incomplete frames back on ring for completion */
+ rx_ring->skb = skb;
+
+ rx_ring->rx_stats.packets += total_packets;
+ rx_ring->rx_stats.bytes += total_bytes;
+ q_vector->rx.total_packets += total_packets;
+ q_vector->rx.total_bytes += total_bytes;
+
+ if (cleaned_count)
+ igb_alloc_rx_buffers(rx_ring, cleaned_count);
+
+#ifndef IGB_NO_LRO
+ igb_lro_flush_all(q_vector);
+
+#endif /* IGB_NO_LRO */
+ return total_packets < budget;
+}
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
+ struct igb_rx_buffer *bi)
+{
+ struct sk_buff *skb = bi->skb;
+ dma_addr_t dma = bi->dma;
+
+ if (dma)
+ return true;
+
+ if (likely(!skb)) {
+ skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring),
+ rx_ring->rx_buffer_len);
+ bi->skb = skb;
+ if (!skb) {
+ rx_ring->rx_stats.alloc_failed++;
+ return false;
+ }
+
+ /* initialize skb for ring */
+ skb_record_rx_queue(skb, ring_queue_index(rx_ring));
+ }
+
+ dma = dma_map_single(rx_ring->dev, skb->data,
+ rx_ring->rx_buffer_len, DMA_FROM_DEVICE);
+
+ /* if mapping failed free memory back to system since
+ * there isn't much point in holding memory we can't use
+ */
+ if (dma_mapping_error(rx_ring->dev, dma)) {
+ dev_kfree_skb_any(skb);
+ bi->skb = NULL;
+
+ rx_ring->rx_stats.alloc_failed++;
+ return false;
+ }
+
+ bi->dma = dma;
+ return true;
+}
+
+#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
+ struct igb_rx_buffer *bi)
+{
+ struct page *page = bi->page;
+ dma_addr_t dma;
+
+ /* since we are recycling buffers we should seldom need to alloc */
+ if (likely(page))
+ return true;
+
+ /* alloc new page for storage */
+ page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+ if (unlikely(!page)) {
+ rx_ring->rx_stats.alloc_failed++;
+ return false;
+ }
+
+ /* map page for use */
+ dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ /*
+ * if mapping failed free memory back to system since
+ * there isn't much point in holding memory we can't use
+ */
+ if (dma_mapping_error(rx_ring->dev, dma)) {
+ __free_page(page);
+
+ rx_ring->rx_stats.alloc_failed++;
+ return false;
+ }
+
+ bi->dma = dma;
+ bi->page = page;
+ bi->page_offset = 0;
+
+ return true;
+}
+
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+/**
+ * igb_alloc_rx_buffers - Replace used receive buffers; packet split
+ * @adapter: address of board private structure
+ **/
+void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
+{
+ union e1000_adv_rx_desc *rx_desc;
+ struct igb_rx_buffer *bi;
+ u16 i = rx_ring->next_to_use;
+
+ /* nothing to do */
+ if (!cleaned_count)
+ return;
+
+ rx_desc = IGB_RX_DESC(rx_ring, i);
+ bi = &rx_ring->rx_buffer_info[i];
+ i -= rx_ring->count;
+
+ do {
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ if (!igb_alloc_mapped_skb(rx_ring, bi))
+#else
+ if (!igb_alloc_mapped_page(rx_ring, bi))
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+ break;
+
+ /*
+ * Refresh the desc even if buffer_addrs didn't change
+ * because each write-back erases this info.
+ */
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+#else
+ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+#endif
+
+ rx_desc++;
+ bi++;
+ i++;
+ if (unlikely(!i)) {
+ rx_desc = IGB_RX_DESC(rx_ring, 0);
+ bi = rx_ring->rx_buffer_info;
+ i -= rx_ring->count;
+ }
+
+ /* clear the hdr_addr for the next_to_use descriptor */
+ rx_desc->read.hdr_addr = 0;
+
+ cleaned_count--;
+ } while (cleaned_count);
+
+ i += rx_ring->count;
+
+ if (rx_ring->next_to_use != i) {
+ /* record the next descriptor to use */
+ rx_ring->next_to_use = i;
+
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = i;
+
+#endif
+ /*
+ * Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(i, rx_ring->tail);
+ }
+}
+
+#ifdef SIOCGMIIPHY
+/**
+ * igb_mii_ioctl -
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ **/
+static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct mii_ioctl_data *data = if_mii(ifr);
+
+ if (adapter->hw.phy.media_type != e1000_media_type_copper)
+ return -EOPNOTSUPP;
+
+ switch (cmd) {
+ case SIOCGMIIPHY:
+ data->phy_id = adapter->hw.phy.addr;
+ break;
+ case SIOCGMIIREG:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if (e1000_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
+ &data->val_out))
+ return -EIO;
+ break;
+ case SIOCSMIIREG:
+ default:
+ return -EOPNOTSUPP;
+ }
+ return E1000_SUCCESS;
+}
+
+#endif
+/**
+ * igb_ioctl -
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ **/
+static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+ switch (cmd) {
+#ifdef SIOCGMIIPHY
+ case SIOCGMIIPHY:
+ case SIOCGMIIREG:
+ case SIOCSMIIREG:
+ return igb_mii_ioctl(netdev, ifr, cmd);
+#endif
+#ifdef HAVE_PTP_1588_CLOCK
+ case SIOCSHWTSTAMP:
+ return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd);
+#endif /* HAVE_PTP_1588_CLOCK */
+#ifdef ETHTOOL_OPS_COMPAT
+ case SIOCETHTOOL:
+ return ethtool_ioctl(ifr);
+#endif
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+ struct igb_adapter *adapter = hw->back;
+ u16 cap_offset;
+
+ cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+ if (!cap_offset)
+ return -E1000_ERR_CONFIG;
+
+ pci_read_config_word(adapter->pdev, cap_offset + reg, value);
+
+ return E1000_SUCCESS;
+}
+
+s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+ struct igb_adapter *adapter = hw->back;
+ u16 cap_offset;
+
+ cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+ if (!cap_offset)
+ return -E1000_ERR_CONFIG;
+
+ pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
+
+ return E1000_SUCCESS;
+}
+
+#ifdef HAVE_VLAN_RX_REGISTER
+static void igb_vlan_mode(struct net_device *netdev, struct vlan_group *vlgrp)
+#else
+void igb_vlan_mode(struct net_device *netdev, u32 features)
+#endif
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ u32 ctrl, rctl;
+ int i;
+#ifdef HAVE_VLAN_RX_REGISTER
+ bool enable = !!vlgrp;
+
+ igb_irq_disable(adapter);
+
+ adapter->vlgrp = vlgrp;
+
+ if (!test_bit(__IGB_DOWN, &adapter->state))
+ igb_irq_enable(adapter);
+#else
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+ bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
+#else
+ bool enable = !!(features & NETIF_F_HW_VLAN_RX);
+#endif
+#endif
+
+ if (enable) {
+ /* enable VLAN tag insert/strip */
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ ctrl |= E1000_CTRL_VME;
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+ /* Disable CFI check */
+ rctl = E1000_READ_REG(hw, E1000_RCTL);
+ rctl &= ~E1000_RCTL_CFIEN;
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+ } else {
+ /* disable VLAN tag insert/strip */
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ ctrl &= ~E1000_CTRL_VME;
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+ }
+
+#ifndef CONFIG_IGB_VMDQ_NETDEV
+ for (i = 0; i < adapter->vmdq_pools; i++) {
+ igb_set_vf_vlan_strip(adapter,
+ adapter->vfs_allocated_count + i,
+ enable);
+ }
+
+#else
+ igb_set_vf_vlan_strip(adapter,
+ adapter->vfs_allocated_count,
+ enable);
+
+ for (i = 1; i < adapter->vmdq_pools; i++) {
+#ifdef HAVE_VLAN_RX_REGISTER
+ struct igb_vmdq_adapter *vadapter;
+ vadapter = netdev_priv(adapter->vmdq_netdev[i-1]);
+ enable = !!vadapter->vlgrp;
+#else
+ struct net_device *vnetdev;
+ vnetdev = adapter->vmdq_netdev[i-1];
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+ enable = !!(vnetdev->features & NETIF_F_HW_VLAN_CTAG_RX);
+#else
+ enable = !!(vnetdev->features & NETIF_F_HW_VLAN_RX);
+#endif
+#endif
+ igb_set_vf_vlan_strip(adapter,
+ adapter->vfs_allocated_count + i,
+ enable);
+ }
+
+#endif
+ igb_rlpml_set(adapter);
+}
+
+#ifdef HAVE_VLAN_PROTOCOL
+static int igb_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
+#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+static int igb_vlan_rx_add_vid(struct net_device *netdev,
+ __always_unused __be16 proto, u16 vid)
+#else
+static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+#endif
+#else
+static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+#endif
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ int pf_id = adapter->vfs_allocated_count;
+
+ /* attempt to add filter to vlvf array */
+ igb_vlvf_set(adapter, vid, TRUE, pf_id);
+
+ /* add the filter since PF can receive vlans w/o entry in vlvf */
+ igb_vfta_set(adapter, vid, TRUE);
+#ifndef HAVE_NETDEV_VLAN_FEATURES
+
+ /* Copy feature flags from netdev to the vlan netdev for this vid.
+ * This allows things like TSO to bubble down to our vlan device.
+ * There is no need to update netdev for vlan 0 (DCB), since it
+ * wouldn't has v_netdev.
+ */
+ if (adapter->vlgrp) {
+ struct vlan_group *vlgrp = adapter->vlgrp;
+ struct net_device *v_netdev = vlan_group_get_device(vlgrp, vid);
+ if (v_netdev) {
+ v_netdev->features |= netdev->features;
+ vlan_group_set_device(vlgrp, vid, v_netdev);
+ }
+ }
+#endif
+#ifndef HAVE_VLAN_RX_REGISTER
+
+ set_bit(vid, adapter->active_vlans);
+#endif
+#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
+ return 0;
+#endif
+}
+
+#ifdef HAVE_VLAN_PROTOCOL
+static int igb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
+#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+static int igb_vlan_rx_kill_vid(struct net_device *netdev,
+ __always_unused __be16 proto, u16 vid)
+#else
+static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+#endif
+#else
+static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+#endif
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ int pf_id = adapter->vfs_allocated_count;
+ s32 err;
+
+#ifdef HAVE_VLAN_RX_REGISTER
+ igb_irq_disable(adapter);
+
+ vlan_group_set_device(adapter->vlgrp, vid, NULL);
+
+ if (!test_bit(__IGB_DOWN, &adapter->state))
+ igb_irq_enable(adapter);
+
+#endif /* HAVE_VLAN_RX_REGISTER */
+ /* remove vlan from VLVF table array */
+ err = igb_vlvf_set(adapter, vid, FALSE, pf_id);
+
+ /* if vid was not present in VLVF just remove it from table */
+ if (err)
+ igb_vfta_set(adapter, vid, FALSE);
+#ifndef HAVE_VLAN_RX_REGISTER
+
+ clear_bit(vid, adapter->active_vlans);
+#endif
+#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
+ return 0;
+#endif
+}
+
+static void igb_restore_vlan(struct igb_adapter *adapter)
+{
+#ifdef HAVE_VLAN_RX_REGISTER
+ igb_vlan_mode(adapter->netdev, adapter->vlgrp);
+
+ if (adapter->vlgrp) {
+ u16 vid;
+ for (vid = 0; vid < VLAN_N_VID; vid++) {
+ if (!vlan_group_get_device(adapter->vlgrp, vid))
+ continue;
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+ igb_vlan_rx_add_vid(adapter->netdev,
+ htons(ETH_P_8021Q), vid);
+#else
+ igb_vlan_rx_add_vid(adapter->netdev, vid);
+#endif
+ }
+ }
+#else
+ u16 vid;
+
+ igb_vlan_mode(adapter->netdev, adapter->netdev->features);
+
+ for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+ igb_vlan_rx_add_vid(adapter->netdev,
+ htons(ETH_P_8021Q), vid);
+#else
+ igb_vlan_rx_add_vid(adapter->netdev, vid);
+#endif
+#endif
+}
+
+int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ struct e1000_mac_info *mac = &adapter->hw.mac;
+
+ mac->autoneg = 0;
+
+ /* SerDes device's does not support 10Mbps Full/duplex
+ * and 100Mbps Half duplex
+ */
+ if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
+ switch (spddplx) {
+ case SPEED_10 + DUPLEX_HALF:
+ case SPEED_10 + DUPLEX_FULL:
+ case SPEED_100 + DUPLEX_HALF:
+ dev_err(pci_dev_to_dev(pdev),
+ "Unsupported Speed/Duplex configuration\n");
+ return -EINVAL;
+ default:
+ break;
+ }
+ }
+
+ switch (spddplx) {
+ case SPEED_10 + DUPLEX_HALF:
+ mac->forced_speed_duplex = ADVERTISE_10_HALF;
+ break;
+ case SPEED_10 + DUPLEX_FULL:
+ mac->forced_speed_duplex = ADVERTISE_10_FULL;
+ break;
+ case SPEED_100 + DUPLEX_HALF:
+ mac->forced_speed_duplex = ADVERTISE_100_HALF;
+ break;
+ case SPEED_100 + DUPLEX_FULL:
+ mac->forced_speed_duplex = ADVERTISE_100_FULL;
+ break;
+ case SPEED_1000 + DUPLEX_FULL:
+ mac->autoneg = 1;
+ adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
+ break;
+ case SPEED_1000 + DUPLEX_HALF: /* not supported */
+ default:
+ dev_err(pci_dev_to_dev(pdev), "Unsupported Speed/Duplex configuration\n");
+ return -EINVAL;
+ }
+
+ /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
+ adapter->hw.phy.mdix = AUTO_ALL_MODES;
+
+ return 0;
+}
+
+static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
+ bool runtime)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ u32 ctrl, rctl, status;
+ u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
+#ifdef CONFIG_PM
+ int retval = 0;
+#endif
+
+ netif_device_detach(netdev);
+
+ status = E1000_READ_REG(hw, E1000_STATUS);
+ if (status & E1000_STATUS_LU)
+ wufc &= ~E1000_WUFC_LNKC;
+
+ if (netif_running(netdev))
+ __igb_close(netdev, true);
+
+ igb_clear_interrupt_scheme(adapter);
+
+#ifdef CONFIG_PM
+ retval = pci_save_state(pdev);
+ if (retval)
+ return retval;
+#endif
+
+ if (wufc) {
+ igb_setup_rctl(adapter);
+ igb_set_rx_mode(netdev);
+
+ /* turn on all-multi mode if wake on multicast is enabled */
+ if (wufc & E1000_WUFC_MC) {
+ rctl = E1000_READ_REG(hw, E1000_RCTL);
+ rctl |= E1000_RCTL_MPE;
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+ }
+
+ ctrl = E1000_READ_REG(hw, E1000_CTRL);
+ /* phy power management enable */
+ #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
+ ctrl |= E1000_CTRL_ADVD3WUC;
+ E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+ /* Allow time for pending master requests to run */
+ e1000_disable_pcie_master(hw);
+
+ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PME_EN);
+ E1000_WRITE_REG(hw, E1000_WUFC, wufc);
+ } else {
+ E1000_WRITE_REG(hw, E1000_WUC, 0);
+ E1000_WRITE_REG(hw, E1000_WUFC, 0);
+ }
+
+ *enable_wake = wufc || adapter->en_mng_pt;
+ if (!*enable_wake)
+ igb_power_down_link(adapter);
+ else
+ igb_power_up_link(adapter);
+
+ /* Release control of h/w to f/w. If f/w is AMT enabled, this
+ * would have already happened in close and is redundant. */
+ igb_release_hw_control(adapter);
+
+ pci_disable_device(pdev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+static int igb_suspend(struct device *dev)
+#else
+static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+{
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+ struct pci_dev *pdev = to_pci_dev(dev);
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+ int retval;
+ bool wake;
+
+ retval = __igb_shutdown(pdev, &wake, 0);
+ if (retval)
+ return retval;
+
+ if (wake) {
+ pci_prepare_to_sleep(pdev);
+ } else {
+ pci_wake_from_d3(pdev, false);
+ pci_set_power_state(pdev, PCI_D3hot);
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+static int igb_resume(struct device *dev)
+#else
+static int igb_resume(struct pci_dev *pdev)
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+{
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+ struct pci_dev *pdev = to_pci_dev(dev);
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ u32 err;
+
+ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+
+ err = pci_enable_device_mem(pdev);
+ if (err) {
+ dev_err(pci_dev_to_dev(pdev),
+ "igb: Cannot enable PCI device from suspend\n");
+ return err;
+ }
+ pci_set_master(pdev);
+
+ pci_enable_wake(pdev, PCI_D3hot, 0);
+ pci_enable_wake(pdev, PCI_D3cold, 0);
+
+ if (igb_init_interrupt_scheme(adapter, true)) {
+ dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
+ return -ENOMEM;
+ }
+
+ igb_reset(adapter);
+
+ /* let the f/w know that the h/w is now under the control of the
+ * driver. */
+ igb_get_hw_control(adapter);
+
+ E1000_WRITE_REG(hw, E1000_WUS, ~0);
+
+ if (netdev->flags & IFF_UP) {
+ rtnl_lock();
+ err = __igb_open(netdev, true);
+ rtnl_unlock();
+ if (err)
+ return err;
+ }
+
+ netif_device_attach(netdev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_RUNTIME
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+static int igb_runtime_idle(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ if (!igb_has_link(adapter))
+ pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
+
+ return -EBUSY;
+}
+
+static int igb_runtime_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int retval;
+ bool wake;
+
+ retval = __igb_shutdown(pdev, &wake, 1);
+ if (retval)
+ return retval;
+
+ if (wake) {
+ pci_prepare_to_sleep(pdev);
+ } else {
+ pci_wake_from_d3(pdev, false);
+ pci_set_power_state(pdev, PCI_D3hot);
+ }
+
+ return 0;
+}
+
+static int igb_runtime_resume(struct device *dev)
+{
+ return igb_resume(dev);
+}
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
+
+#ifdef USE_REBOOT_NOTIFIER
+/* only want to do this for 2.4 kernels? */
+static int igb_notify_reboot(struct notifier_block *nb, unsigned long event,
+ void *p)
+{
+ struct pci_dev *pdev = NULL;
+ bool wake;
+
+ switch (event) {
+ case SYS_DOWN:
+ case SYS_HALT:
+ case SYS_POWER_OFF:
+ while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
+ if (pci_dev_driver(pdev) == &igb_driver) {
+ __igb_shutdown(pdev, &wake, 0);
+ if (event == SYS_POWER_OFF) {
+ pci_wake_from_d3(pdev, wake);
+ pci_set_power_state(pdev, PCI_D3hot);
+ }
+ }
+ }
+ }
+ return NOTIFY_DONE;
+}
+#else
+static void igb_shutdown(struct pci_dev *pdev)
+{
+ bool wake = false;
+
+ __igb_shutdown(pdev, &wake, 0);
+
+ if (system_state == SYSTEM_POWER_OFF) {
+ pci_wake_from_d3(pdev, wake);
+ pci_set_power_state(pdev, PCI_D3hot);
+ }
+}
+#endif /* USE_REBOOT_NOTIFIER */
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/*
+ * Polling 'interrupt' - used by things like netconsole to send skbs
+ * without having to re-enable interrupts. It's not called while
+ * the interrupt routine is executing.
+ */
+static void igb_netpoll(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ struct igb_q_vector *q_vector;
+ int i;
+
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ q_vector = adapter->q_vector[i];
+ if (adapter->msix_entries)
+ E1000_WRITE_REG(hw, E1000_EIMC, q_vector->eims_value);
+ else
+ igb_irq_disable(adapter);
+ napi_schedule(&q_vector->napi);
+ }
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
+#ifdef HAVE_PCI_ERS
+#define E1000_DEV_ID_82576_VF 0x10CA
+/**
+ * igb_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+#ifdef CONFIG_PCI_IOV__UNUSED
+ struct pci_dev *bdev, *vfdev;
+ u32 dw0, dw1, dw2, dw3;
+ int vf, pos;
+ u16 req_id, pf_func;
+
+ if (!(adapter->flags & IGB_FLAG_DETECT_BAD_DMA))
+ goto skip_bad_vf_detection;
+
+ bdev = pdev->bus->self;
+ while (bdev && (pci_pcie_type(bdev) != PCI_EXP_TYPE_ROOT_PORT))
+ bdev = bdev->bus->self;
+
+ if (!bdev)
+ goto skip_bad_vf_detection;
+
+ pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR);
+ if (!pos)
+ goto skip_bad_vf_detection;
+
+ pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG, &dw0);
+ pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 4, &dw1);
+ pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 8, &dw2);
+ pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 12, &dw3);
+
+ req_id = dw1 >> 16;
+ /* On the 82576 if bit 7 of the requestor ID is set then it's a VF */
+ if (!(req_id & 0x0080))
+ goto skip_bad_vf_detection;
+
+ pf_func = req_id & 0x01;
+ if ((pf_func & 1) == (pdev->devfn & 1)) {
+
+ vf = (req_id & 0x7F) >> 1;
+ dev_err(pci_dev_to_dev(pdev),
+ "VF %d has caused a PCIe error\n", vf);
+ dev_err(pci_dev_to_dev(pdev),
+ "TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
+ "%8.8x\tdw3: %8.8x\n",
+ dw0, dw1, dw2, dw3);
+
+ /* Find the pci device of the offending VF */
+ vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ E1000_DEV_ID_82576_VF, NULL);
+ while (vfdev) {
+ if (vfdev->devfn == (req_id & 0xFF))
+ break;
+ vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ E1000_DEV_ID_82576_VF, vfdev);
+ }
+ /*
+ * There's a slim chance the VF could have been hot plugged,
+ * so if it is no longer present we don't need to issue the
+ * VFLR. Just clean up the AER in that case.
+ */
+ if (vfdev) {
+ dev_err(pci_dev_to_dev(pdev),
+ "Issuing VFLR to VF %d\n", vf);
+ pci_write_config_dword(vfdev, 0xA8, 0x00008000);
+ }
+
+ pci_cleanup_aer_uncorrect_error_status(pdev);
+ }
+
+ /*
+ * Even though the error may have occurred on the other port
+ * we still need to increment the vf error reference count for
+ * both ports because the I/O resume function will be called
+ * for both of them.
+ */
+ adapter->vferr_refcount++;
+
+ return PCI_ERS_RESULT_RECOVERED;
+
+skip_bad_vf_detection:
+#endif /* CONFIG_PCI_IOV */
+
+ netif_device_detach(netdev);
+
+ if (state == pci_channel_io_perm_failure)
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ if (netif_running(netdev))
+ igb_down(adapter);
+ pci_disable_device(pdev);
+
+ /* Request a slot slot reset. */
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * igb_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot. Implementation
+ * resembles the first-half of the igb_resume routine.
+ */
+static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ pci_ers_result_t result;
+
+ if (pci_enable_device_mem(pdev)) {
+ dev_err(pci_dev_to_dev(pdev),
+ "Cannot re-enable PCI device after reset.\n");
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else {
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+
+ pci_enable_wake(pdev, PCI_D3hot, 0);
+ pci_enable_wake(pdev, PCI_D3cold, 0);
+
+ schedule_work(&adapter->reset_task);
+ E1000_WRITE_REG(hw, E1000_WUS, ~0);
+ result = PCI_ERS_RESULT_RECOVERED;
+ }
+
+ pci_cleanup_aer_uncorrect_error_status(pdev);
+
+ return result;
+}
+
+/**
+ * igb_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation. Implementation resembles the
+ * second-half of the igb_resume routine.
+ */
+static void igb_io_resume(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
+ if (adapter->vferr_refcount) {
+ dev_info(pci_dev_to_dev(pdev), "Resuming after VF err\n");
+ adapter->vferr_refcount--;
+ return;
+ }
+
+ if (netif_running(netdev)) {
+ if (igb_up(adapter)) {
+ dev_err(pci_dev_to_dev(pdev), "igb_up failed after reset\n");
+ return;
+ }
+ }
+
+ netif_device_attach(netdev);
+
+ /* let the f/w know that the h/w is now under the control of the
+ * driver. */
+ igb_get_hw_control(adapter);
+}
+
+#endif /* HAVE_PCI_ERS */
+
+int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int i;
+
+ if (is_zero_ether_addr(addr))
+ return 0;
+
+ for (i = 0; i < hw->mac.rar_entry_count; i++) {
+ if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)
+ continue;
+ adapter->mac_table[i].state = (IGB_MAC_STATE_MODIFIED |
+ IGB_MAC_STATE_IN_USE);
+ memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN);
+ adapter->mac_table[i].queue = queue;
+ igb_sync_mac_table(adapter);
+ return 0;
+ }
+ return -ENOMEM;
+}
+int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue)
+{
+ /* search table for addr, if found, set to 0 and sync */
+ int i;
+ struct e1000_hw *hw = &adapter->hw;
+
+ if (is_zero_ether_addr(addr))
+ return 0;
+ for (i = 0; i < hw->mac.rar_entry_count; i++) {
+ if (ether_addr_equal(addr, adapter->mac_table[i].addr) &&
+ adapter->mac_table[i].queue == queue) {
+ adapter->mac_table[i].state = IGB_MAC_STATE_MODIFIED;
+ memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+ adapter->mac_table[i].queue = 0;
+ igb_sync_mac_table(adapter);
+ return 0;
+ }
+ }
+ return -ENOMEM;
+}
+static int igb_set_vf_mac(struct igb_adapter *adapter,
+ int vf, unsigned char *mac_addr)
+{
+ igb_del_mac_filter(adapter, adapter->vf_data[vf].vf_mac_addresses, vf);
+ memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
+
+ igb_add_mac_filter(adapter, mac_addr, vf);
+
+ return 0;
+}
+
+#ifdef IFLA_VF_MAX
+static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
+ return -EINVAL;
+ adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
+ dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
+ dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
+ " change effective.\n");
+ if (test_bit(__IGB_DOWN, &adapter->state)) {
+ dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
+ " but the PF device is not up.\n");
+ dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
+ " attempting to use the VF device.\n");
+ }
+ return igb_set_vf_mac(adapter, vf, mac);
+}
+
+static int igb_link_mbps(int internal_link_speed)
+{
+ switch (internal_link_speed) {
+ case SPEED_100:
+ return 100;
+ case SPEED_1000:
+ return 1000;
+ case SPEED_2500:
+ return 2500;
+ default:
+ return 0;
+ }
+}
+
+static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
+ int link_speed)
+{
+ int rf_dec, rf_int;
+ u32 bcnrc_val;
+
+ if (tx_rate != 0) {
+ /* Calculate the rate factor values to set */
+ rf_int = link_speed / tx_rate;
+ rf_dec = (link_speed - (rf_int * tx_rate));
+ rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
+
+ bcnrc_val = E1000_RTTBCNRC_RS_ENA;
+ bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
+ E1000_RTTBCNRC_RF_INT_MASK);
+ bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
+ } else {
+ bcnrc_val = 0;
+ }
+
+ E1000_WRITE_REG(hw, E1000_RTTDQSEL, vf); /* vf X uses queue X */
+ /*
+ * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
+ * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
+ */
+ E1000_WRITE_REG(hw, E1000_RTTBCNRM(0), 0x14);
+ E1000_WRITE_REG(hw, E1000_RTTBCNRC, bcnrc_val);
+}
+
+static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
+{
+ int actual_link_speed, i;
+ bool reset_rate = false;
+
+ /* VF TX rate limit was not set */
+ if ((adapter->vf_rate_link_speed == 0) ||
+ (adapter->hw.mac.type != e1000_82576))
+ return;
+
+ actual_link_speed = igb_link_mbps(adapter->link_speed);
+ if (actual_link_speed != adapter->vf_rate_link_speed) {
+ reset_rate = true;
+ adapter->vf_rate_link_speed = 0;
+ dev_info(&adapter->pdev->dev,
+ "Link speed has been changed. VF Transmit rate is disabled\n");
+ }
+
+ for (i = 0; i < adapter->vfs_allocated_count; i++) {
+ if (reset_rate)
+ adapter->vf_data[i].tx_rate = 0;
+
+ igb_set_vf_rate_limit(&adapter->hw, i,
+ adapter->vf_data[i].tx_rate, actual_link_speed);
+ }
+}
+
+#ifdef HAVE_VF_MIN_MAX_TXRATE
+static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
+ int tx_rate)
+#else /* HAVE_VF_MIN_MAX_TXRATE */
+static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
+#endif /* HAVE_VF_MIN_MAX_TXRATE */
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ int actual_link_speed;
+
+ if (hw->mac.type != e1000_82576)
+ return -EOPNOTSUPP;
+
+#ifdef HAVE_VF_MIN_MAX_TXRATE
+ if (min_tx_rate)
+ return -EINVAL;
+#endif /* HAVE_VF_MIN_MAX_TXRATE */
+
+ actual_link_speed = igb_link_mbps(adapter->link_speed);
+ if ((vf >= adapter->vfs_allocated_count) ||
+ (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) ||
+ (tx_rate < 0) || (tx_rate > actual_link_speed))
+ return -EINVAL;
+
+ adapter->vf_rate_link_speed = actual_link_speed;
+ adapter->vf_data[vf].tx_rate = (u16)tx_rate;
+ igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
+
+ return 0;
+}
+
+static int igb_ndo_get_vf_config(struct net_device *netdev,
+ int vf, struct ifla_vf_info *ivi)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ if (vf >= adapter->vfs_allocated_count)
+ return -EINVAL;
+ ivi->vf = vf;
+ memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
+#ifdef HAVE_VF_MIN_MAX_TXRATE
+ ivi->max_tx_rate = adapter->vf_data[vf].tx_rate;
+ ivi->min_tx_rate = 0;
+#else /* HAVE_VF_MIN_MAX_TXRATE */
+ ivi->tx_rate = adapter->vf_data[vf].tx_rate;
+#endif /* HAVE_VF_MIN_MAX_TXRATE */
+ ivi->vlan = adapter->vf_data[vf].pf_vlan;
+ ivi->qos = adapter->vf_data[vf].pf_qos;
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+ ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
+#endif
+ return 0;
+}
+#endif
+static void igb_vmm_control(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int count;
+ u32 reg;
+
+ switch (hw->mac.type) {
+ case e1000_82575:
+ default:
+ /* replication is not supported for 82575 */
+ return;
+ case e1000_82576:
+ /* notify HW that the MAC is adding vlan tags */
+ reg = E1000_READ_REG(hw, E1000_DTXCTL);
+ reg |= (E1000_DTXCTL_VLAN_ADDED |
+ E1000_DTXCTL_SPOOF_INT);
+ E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
+ case e1000_82580:
+ /* enable replication vlan tag stripping */
+ reg = E1000_READ_REG(hw, E1000_RPLOLR);
+ reg |= E1000_RPLOLR_STRVLAN;
+ E1000_WRITE_REG(hw, E1000_RPLOLR, reg);
+ case e1000_i350:
+ case e1000_i354:
+ /* none of the above registers are supported by i350 */
+ break;
+ }
+
+ /* Enable Malicious Driver Detection */
+ if ((adapter->vfs_allocated_count) &&
+ (adapter->mdd)) {
+ if (hw->mac.type == e1000_i350)
+ igb_enable_mdd(adapter);
+ }
+
+ /* enable replication and loopback support */
+ count = adapter->vfs_allocated_count || adapter->vmdq_pools;
+ if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE && count)
+ e1000_vmdq_set_loopback_pf(hw, 1);
+ e1000_vmdq_set_anti_spoofing_pf(hw,
+ adapter->vfs_allocated_count || adapter->vmdq_pools,
+ adapter->vfs_allocated_count);
+ e1000_vmdq_set_replication_pf(hw, adapter->vfs_allocated_count ||
+ adapter->vmdq_pools);
+}
+
+static void igb_init_fw(struct igb_adapter *adapter)
+{
+ struct e1000_fw_drv_info fw_cmd;
+ struct e1000_hw *hw = &adapter->hw;
+ int i;
+ u16 mask;
+
+ if (hw->mac.type == e1000_i210)
+ mask = E1000_SWFW_EEP_SM;
+ else
+ mask = E1000_SWFW_PHY0_SM;
+ /* i211 parts do not support this feature */
+ if (hw->mac.type == e1000_i211)
+ hw->mac.arc_subsystem_valid = false;
+
+ if (!hw->mac.ops.acquire_swfw_sync(hw, mask)) {
+ for (i = 0; i <= FW_MAX_RETRIES; i++) {
+ E1000_WRITE_REG(hw, E1000_FWSTS, E1000_FWSTS_FWRI);
+ fw_cmd.hdr.cmd = FW_CMD_DRV_INFO;
+ fw_cmd.hdr.buf_len = FW_CMD_DRV_INFO_LEN;
+ fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CMD_RESERVED;
+ fw_cmd.port_num = hw->bus.func;
+ fw_cmd.drv_version = FW_FAMILY_DRV_VER;
+ fw_cmd.hdr.checksum = 0;
+ fw_cmd.hdr.checksum = e1000_calculate_checksum((u8 *)&fw_cmd,
+ (FW_HDR_LEN +
+ fw_cmd.hdr.buf_len));
+ e1000_host_interface_command(hw, (u8*)&fw_cmd,
+ sizeof(fw_cmd));
+ if (fw_cmd.hdr.cmd_or_resp.ret_status == FW_STATUS_SUCCESS)
+ break;
+ }
+ } else
+ dev_warn(pci_dev_to_dev(adapter->pdev),
+ "Unable to get semaphore, firmware init failed.\n");
+ hw->mac.ops.release_swfw_sync(hw, mask);
+}
+
+static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 dmac_thr;
+ u16 hwm;
+ u32 status;
+
+ if (hw->mac.type == e1000_i211)
+ return;
+
+ if (hw->mac.type > e1000_82580) {
+ if (adapter->dmac != IGB_DMAC_DISABLE) {
+ u32 reg;
+
+ /* force threshold to 0. */
+ E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
+
+ /*
+ * DMA Coalescing high water mark needs to be greater
+ * than the Rx threshold. Set hwm to PBA - max frame
+ * size in 16B units, capping it at PBA - 6KB.
+ */
+ hwm = 64 * pba - adapter->max_frame_size / 16;
+ if (hwm < 64 * (pba - 6))
+ hwm = 64 * (pba - 6);
+ reg = E1000_READ_REG(hw, E1000_FCRTC);
+ reg &= ~E1000_FCRTC_RTH_COAL_MASK;
+ reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
+ & E1000_FCRTC_RTH_COAL_MASK);
+ E1000_WRITE_REG(hw, E1000_FCRTC, reg);
+
+ /*
+ * Set the DMA Coalescing Rx threshold to PBA - 2 * max
+ * frame size, capping it at PBA - 10KB.
+ */
+ dmac_thr = pba - adapter->max_frame_size / 512;
+ if (dmac_thr < pba - 10)
+ dmac_thr = pba - 10;
+ reg = E1000_READ_REG(hw, E1000_DMACR);
+ reg &= ~E1000_DMACR_DMACTHR_MASK;
+ reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
+ & E1000_DMACR_DMACTHR_MASK);
+
+ /* transition to L0x or L1 if available..*/
+ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
+
+ /* Check if status is 2.5Gb backplane connection
+ * before configuration of watchdog timer, which is
+ * in msec values in 12.8usec intervals
+ * watchdog timer= msec values in 32usec intervals
+ * for non 2.5Gb connection
+ */
+ if (hw->mac.type == e1000_i354) {
+ status = E1000_READ_REG(hw, E1000_STATUS);
+ if ((status & E1000_STATUS_2P5_SKU) &&
+ (!(status & E1000_STATUS_2P5_SKU_OVER)))
+ reg |= ((adapter->dmac * 5) >> 6);
+ else
+ reg |= ((adapter->dmac) >> 5);
+ } else {
+ reg |= ((adapter->dmac) >> 5);
+ }
+
+ /*
+ * Disable BMC-to-OS Watchdog enable
+ * on devices that support OS-to-BMC
+ */
+ if (hw->mac.type != e1000_i354)
+ reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
+ E1000_WRITE_REG(hw, E1000_DMACR, reg);
+
+ /* no lower threshold to disable coalescing(smart fifb)-UTRESH=0*/
+ E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
+
+ /* This sets the time to wait before requesting
+ * transition to low power state to number of usecs
+ * needed to receive 1 512 byte frame at gigabit
+ * line rate. On i350 device, time to make transition
+ * to Lx state is delayed by 4 usec with flush disable
+ * bit set to avoid losing mailbox interrupts
+ */
+ reg = E1000_READ_REG(hw, E1000_DMCTLX);
+ if (hw->mac.type == e1000_i350)
+ reg |= IGB_DMCTLX_DCFLUSH_DIS;
+
+ /* in 2.5Gb connection, TTLX unit is 0.4 usec
+ * which is 0x4*2 = 0xA. But delay is still 4 usec
+ */
+ if (hw->mac.type == e1000_i354) {
+ status = E1000_READ_REG(hw, E1000_STATUS);
+ if ((status & E1000_STATUS_2P5_SKU) &&
+ (!(status & E1000_STATUS_2P5_SKU_OVER)))
+ reg |= 0xA;
+ else
+ reg |= 0x4;
+ } else {
+ reg |= 0x4;
+ }
+ E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
+
+ /* free space in tx packet buffer to wake from DMA coal */
+ E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
+ (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
+
+ /* make low power state decision controlled by DMA coal */
+ reg = E1000_READ_REG(hw, E1000_PCIEMISC);
+ reg &= ~E1000_PCIEMISC_LX_DECISION;
+ E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
+ } /* endif adapter->dmac is not disabled */
+ } else if (hw->mac.type == e1000_82580) {
+ u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
+ E1000_WRITE_REG(hw, E1000_PCIEMISC,
+ reg & ~E1000_PCIEMISC_LX_DECISION);
+ E1000_WRITE_REG(hw, E1000_DMACR, 0);
+ }
+}
+
+#ifdef HAVE_I2C_SUPPORT
+/* igb_read_i2c_byte - Reads 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to read
+ * @dev_addr: device address
+ * @data: value read
+ *
+ * Performs byte read operation over I2C interface at
+ * a specified device address.
+ */
+s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data)
+{
+ struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
+ struct i2c_client *this_client = adapter->i2c_client;
+ s32 status;
+ u16 swfw_mask = 0;
+
+ if (!this_client)
+ return E1000_ERR_I2C;
+
+ swfw_mask = E1000_SWFW_PHY0_SM;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)
+ != E1000_SUCCESS)
+ return E1000_ERR_SWFW_SYNC;
+
+ status = i2c_smbus_read_byte_data(this_client, byte_offset);
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+ if (status < 0)
+ return E1000_ERR_I2C;
+ else {
+ *data = status;
+ return E1000_SUCCESS;
+ }
+}
+
+/* igb_write_i2c_byte - Writes 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to write
+ * @dev_addr: device address
+ * @data: value to write
+ *
+ * Performs byte write operation over I2C interface at
+ * a specified device address.
+ */
+s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data)
+{
+ struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
+ struct i2c_client *this_client = adapter->i2c_client;
+ s32 status;
+ u16 swfw_mask = E1000_SWFW_PHY0_SM;
+
+ if (!this_client)
+ return E1000_ERR_I2C;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS)
+ return E1000_ERR_SWFW_SYNC;
+ status = i2c_smbus_write_byte_data(this_client, byte_offset, data);
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+ if (status)
+ return E1000_ERR_I2C;
+ else
+ return E1000_SUCCESS;
+}
+#endif /* HAVE_I2C_SUPPORT */
+/* igb_main.c */
+
+
+/**
+ * igb_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in igb_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * igb_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+int igb_kni_probe(struct pci_dev *pdev,
+ struct net_device **lad_dev)
+{
+ struct net_device *netdev;
+ struct igb_adapter *adapter;
+ struct e1000_hw *hw;
+ u16 eeprom_data = 0;
+ u8 pba_str[E1000_PBANUM_LENGTH];
+ s32 ret_val;
+ static int global_quad_port_a; /* global quad port a indication */
+ int i, err, pci_using_dac = 0;
+ static int cards_found;
+
+ err = pci_enable_device_mem(pdev);
+ if (err)
+ return err;
+
+#ifdef NO_KNI
+ pci_using_dac = 0;
+ err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
+ if (!err) {
+ err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
+ if (!err)
+ pci_using_dac = 1;
+ } else {
+ err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
+ if (err) {
+ err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
+ if (err) {
+ IGB_ERR("No usable DMA configuration, "
+ "aborting\n");
+ goto err_dma;
+ }
+ }
+ }
+
+#ifndef HAVE_ASPM_QUIRKS
+ /* 82575 requires that the pci-e link partner disable the L0s state */
+ switch (pdev->device) {
+ case E1000_DEV_ID_82575EB_COPPER:
+ case E1000_DEV_ID_82575EB_FIBER_SERDES:
+ case E1000_DEV_ID_82575GB_QUAD_COPPER:
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
+ default:
+ break;
+ }
+
+#endif /* HAVE_ASPM_QUIRKS */
+ err = pci_request_selected_regions(pdev,
+ pci_select_bars(pdev,
+ IORESOURCE_MEM),
+ igb_driver_name);
+ if (err)
+ goto err_pci_reg;
+
+ pci_enable_pcie_error_reporting(pdev);
+
+ pci_set_master(pdev);
+
+ err = -ENOMEM;
+#endif /* NO_KNI */
+#ifdef HAVE_TX_MQ
+ netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
+ IGB_MAX_TX_QUEUES);
+#else
+ netdev = alloc_etherdev(sizeof(struct igb_adapter));
+#endif /* HAVE_TX_MQ */
+ if (!netdev)
+ goto err_alloc_etherdev;
+
+ SET_MODULE_OWNER(netdev);
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+
+ //pci_set_drvdata(pdev, netdev);
+ adapter = netdev_priv(netdev);
+ adapter->netdev = netdev;
+ adapter->pdev = pdev;
+ hw = &adapter->hw;
+ hw->back = adapter;
+ adapter->port_num = hw->bus.func;
+ adapter->msg_enable = (1 << debug) - 1;
+
+#ifdef HAVE_PCI_ERS
+ err = pci_save_state(pdev);
+ if (err)
+ goto err_ioremap;
+#endif
+ err = -EIO;
+ hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+ pci_resource_len(pdev, 0));
+ if (!hw->hw_addr)
+ goto err_ioremap;
+
+#ifdef HAVE_NET_DEVICE_OPS
+ netdev->netdev_ops = &igb_netdev_ops;
+#else /* HAVE_NET_DEVICE_OPS */
+ netdev->open = &igb_open;
+ netdev->stop = &igb_close;
+ netdev->get_stats = &igb_get_stats;
+#ifdef HAVE_SET_RX_MODE
+ netdev->set_rx_mode = &igb_set_rx_mode;
+#endif
+ netdev->set_multicast_list = &igb_set_rx_mode;
+ netdev->set_mac_address = &igb_set_mac;
+ netdev->change_mtu = &igb_change_mtu;
+ netdev->do_ioctl = &igb_ioctl;
+#ifdef HAVE_TX_TIMEOUT
+ netdev->tx_timeout = &igb_tx_timeout;
+#endif
+ netdev->vlan_rx_register = igb_vlan_mode;
+ netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
+ netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ netdev->poll_controller = igb_netpoll;
+#endif
+ netdev->hard_start_xmit = &igb_xmit_frame;
+#endif /* HAVE_NET_DEVICE_OPS */
+ igb_set_ethtool_ops(netdev);
+#ifdef HAVE_TX_TIMEOUT
+ netdev->watchdog_timeo = 5 * HZ;
+#endif
+
+ strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
+
+ adapter->bd_number = cards_found;
+
+ /* setup the private structure */
+ err = igb_sw_init(adapter);
+ if (err)
+ goto err_sw_init;
+
+ e1000_get_bus_info(hw);
+
+ hw->phy.autoneg_wait_to_complete = FALSE;
+ hw->mac.adaptive_ifs = FALSE;
+
+ /* Copper options */
+ if (hw->phy.media_type == e1000_media_type_copper) {
+ hw->phy.mdix = AUTO_ALL_MODES;
+ hw->phy.disable_polarity_correction = FALSE;
+ hw->phy.ms_type = e1000_ms_hw_default;
+ }
+
+ if (e1000_check_reset_block(hw))
+ dev_info(pci_dev_to_dev(pdev),
+ "PHY reset is blocked due to SOL/IDER session.\n");
+
+ /*
+ * features is initialized to 0 in allocation, it might have bits
+ * set by igb_sw_init so we should use an or instead of an
+ * assignment.
+ */
+ netdev->features |= NETIF_F_SG |
+ NETIF_F_IP_CSUM |
+#ifdef NETIF_F_IPV6_CSUM
+ NETIF_F_IPV6_CSUM |
+#endif
+#ifdef NETIF_F_TSO
+ NETIF_F_TSO |
+#ifdef NETIF_F_TSO6
+ NETIF_F_TSO6 |
+#endif
+#endif /* NETIF_F_TSO */
+#ifdef NETIF_F_RXHASH
+ NETIF_F_RXHASH |
+#endif
+ NETIF_F_RXCSUM |
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+ NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_VLAN_CTAG_TX;
+#else
+ NETIF_F_HW_VLAN_RX |
+ NETIF_F_HW_VLAN_TX;
+#endif
+
+ if (hw->mac.type >= e1000_82576)
+ netdev->features |= NETIF_F_SCTP_CSUM;
+
+#ifdef HAVE_NDO_SET_FEATURES
+ /* copy netdev features into list of user selectable features */
+ netdev->hw_features |= netdev->features;
+#ifndef IGB_NO_LRO
+
+ /* give us the option of enabling LRO later */
+ netdev->hw_features |= NETIF_F_LRO;
+#endif
+#else
+#ifdef NETIF_F_GRO
+
+ /* this is only needed on kernels prior to 2.6.39 */
+ netdev->features |= NETIF_F_GRO;
+#endif
+#endif
+
+ /* set this bit last since it cannot be part of hw_features */
+#ifdef NETIF_F_HW_VLAN_CTAG_FILTER
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+#else
+ netdev->features |= NETIF_F_HW_VLAN_FILTER;
+#endif
+
+#ifdef HAVE_NETDEV_VLAN_FEATURES
+ netdev->vlan_features |= NETIF_F_TSO |
+ NETIF_F_TSO6 |
+ NETIF_F_IP_CSUM |
+ NETIF_F_IPV6_CSUM |
+ NETIF_F_SG;
+
+#endif
+ if (pci_using_dac)
+ netdev->features |= NETIF_F_HIGHDMA;
+
+#ifdef NO_KNI
+ adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
+#ifdef DEBUG
+ if (adapter->dmac != IGB_DMAC_DISABLE)
+ printk("%s: DMA Coalescing is enabled..\n", netdev->name);
+#endif
+
+ /* before reading the NVM, reset the controller to put the device in a
+ * known good starting state */
+ e1000_reset_hw(hw);
+#endif /* NO_KNI */
+
+ /* make sure the NVM is good */
+ if (e1000_validate_nvm_checksum(hw) < 0) {
+ dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
+ " Valid\n");
+ err = -EIO;
+ goto err_eeprom;
+ }
+
+ /* copy the MAC address out of the NVM */
+ if (e1000_read_mac_addr(hw))
+ dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
+ memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
+#ifdef ETHTOOL_GPERMADDR
+ memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
+
+ if (!is_valid_ether_addr(netdev->perm_addr)) {
+#else
+ if (!is_valid_ether_addr(netdev->dev_addr)) {
+#endif
+ dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
+ err = -EIO;
+ goto err_eeprom;
+ }
+
+ memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
+ adapter->mac_table[0].queue = adapter->vfs_allocated_count;
+ adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
+ igb_rar_set(adapter, 0);
+
+ /* get firmware version for ethtool -i */
+ igb_set_fw_version(adapter);
+
+ /* Check if Media Autosense is enabled */
+ if (hw->mac.type == e1000_82580)
+ igb_init_mas(adapter);
+
+#ifdef NO_KNI
+ setup_timer(&adapter->watchdog_timer, &igb_watchdog,
+ (unsigned long) adapter);
+ if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+ setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
+ (unsigned long) adapter);
+ setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
+ (unsigned long) adapter);
+
+ INIT_WORK(&adapter->reset_task, igb_reset_task);
+ INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
+ if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+ INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
+#endif
+
+ /* Initialize link properties that are user-changeable */
+ adapter->fc_autoneg = true;
+ hw->mac.autoneg = true;
+ hw->phy.autoneg_advertised = 0x2f;
+
+ hw->fc.requested_mode = e1000_fc_default;
+ hw->fc.current_mode = e1000_fc_default;
+
+ e1000_validate_mdi_setting(hw);
+
+ /* By default, support wake on port A */
+ if (hw->bus.func == 0)
+ adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+
+ /* Check the NVM for wake support for non-port A ports */
+ if (hw->mac.type >= e1000_82580)
+ hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
+ NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
+ &eeprom_data);
+ else if (hw->bus.func == 1)
+ e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
+
+ if (eeprom_data & IGB_EEPROM_APME)
+ adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+
+ /* now that we have the eeprom settings, apply the special cases where
+ * the eeprom may be wrong or the board simply won't support wake on
+ * lan on a particular port */
+ switch (pdev->device) {
+ case E1000_DEV_ID_82575GB_QUAD_COPPER:
+ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+ break;
+ case E1000_DEV_ID_82575EB_FIBER_SERDES:
+ case E1000_DEV_ID_82576_FIBER:
+ case E1000_DEV_ID_82576_SERDES:
+ /* Wake events only supported on port A for dual fiber
+ * regardless of eeprom setting */
+ if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
+ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+ break;
+ case E1000_DEV_ID_82576_QUAD_COPPER:
+ case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
+ /* if quad port adapter, disable WoL on all but port A */
+ if (global_quad_port_a != 0)
+ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+ else
+ adapter->flags |= IGB_FLAG_QUAD_PORT_A;
+ /* Reset for multiple quad port adapters */
+ if (++global_quad_port_a == 4)
+ global_quad_port_a = 0;
+ break;
+ default:
+ /* If the device can't wake, don't set software support */
+ if (!device_can_wakeup(&adapter->pdev->dev))
+ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+ break;
+ }
+
+ /* initialize the wol settings based on the eeprom settings */
+ if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)
+ adapter->wol |= E1000_WUFC_MAG;
+
+ /* Some vendors want WoL disabled by default, but still supported */
+ if ((hw->mac.type == e1000_i350) &&
+ (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
+ adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+ adapter->wol = 0;
+ }
+
+#ifdef NO_KNI
+ device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev),
+ adapter->flags & IGB_FLAG_WOL_SUPPORTED);
+
+ /* reset the hardware with the new settings */
+ igb_reset(adapter);
+ adapter->devrc = 0;
+
+#ifdef HAVE_I2C_SUPPORT
+ /* Init the I2C interface */
+ err = igb_init_i2c(adapter);
+ if (err) {
+ dev_err(&pdev->dev, "failed to init i2c interface\n");
+ goto err_eeprom;
+ }
+#endif /* HAVE_I2C_SUPPORT */
+
+ /* let the f/w know that the h/w is now under the control of the
+ * driver. */
+ igb_get_hw_control(adapter);
+
+ strncpy(netdev->name, "eth%d", IFNAMSIZ);
+ err = register_netdev(netdev);
+ if (err)
+ goto err_register;
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ err = igb_init_vmdq_netdevs(adapter);
+ if (err)
+ goto err_register;
+#endif
+ /* carrier off reporting is important to ethtool even BEFORE open */
+ netif_carrier_off(netdev);
+
+#ifdef IGB_DCA
+ if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
+ adapter->flags |= IGB_FLAG_DCA_ENABLED;
+ dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
+ igb_setup_dca(adapter);
+ }
+
+#endif
+#ifdef HAVE_PTP_1588_CLOCK
+ /* do hw tstamp init after resetting */
+ igb_ptp_init(adapter);
+#endif /* HAVE_PTP_1588_CLOCK */
+
+#endif /* NO_KNI */
+ dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
+ /* print bus type/speed/width info */
+ dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
+ netdev->name,
+ ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
+ (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
+ (hw->mac.type == e1000_i354) ? "integrated" :
+ "unknown"),
+ ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
+ (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
+ (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
+ (hw->mac.type == e1000_i354) ? "integrated" :
+ "unknown"));
+ dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
+ for (i = 0; i < 6; i++)
+ printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
+
+ ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
+ if (ret_val)
+ strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
+ dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
+ pba_str);
+
+
+ /* Initialize the thermal sensor on i350 devices. */
+ if (hw->mac.type == e1000_i350) {
+ if (hw->bus.func == 0) {
+ u16 ets_word;
+
+ /*
+ * Read the NVM to determine if this i350 device
+ * supports an external thermal sensor.
+ */
+ e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
+ if (ets_word != 0x0000 && ets_word != 0xFFFF)
+ adapter->ets = true;
+ else
+ adapter->ets = false;
+ }
+#ifdef NO_KNI
+#ifdef IGB_HWMON
+
+ igb_sysfs_init(adapter);
+#else
+#ifdef IGB_PROCFS
+
+ igb_procfs_init(adapter);
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+#endif /* NO_KNI */
+ } else {
+ adapter->ets = false;
+ }
+
+ if (hw->phy.media_type == e1000_media_type_copper) {
+ switch (hw->mac.type) {
+ case e1000_i350:
+ case e1000_i210:
+ case e1000_i211:
+ /* Enable EEE for internal copper PHY devices */
+ err = e1000_set_eee_i350(hw);
+ if ((!err) &&
+ (adapter->flags & IGB_FLAG_EEE))
+ adapter->eee_advert =
+ MDIO_EEE_100TX | MDIO_EEE_1000T;
+ break;
+ case e1000_i354:
+ if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) &
+ (E1000_CTRL_EXT_LINK_MODE_SGMII)) {
+ err = e1000_set_eee_i354(hw);
+ if ((!err) &&
+ (adapter->flags & IGB_FLAG_EEE))
+ adapter->eee_advert =
+ MDIO_EEE_100TX | MDIO_EEE_1000T;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* send driver version info to firmware */
+ if (hw->mac.type >= e1000_i350)
+ igb_init_fw(adapter);
+
+#ifndef IGB_NO_LRO
+ if (netdev->features & NETIF_F_LRO)
+ dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
+ else
+ dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
+#endif
+ dev_info(pci_dev_to_dev(pdev),
+ "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
+ adapter->msix_entries ? "MSI-X" :
+ (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
+ adapter->num_rx_queues, adapter->num_tx_queues);
+
+ cards_found++;
+ *lad_dev = netdev;
+
+ pm_runtime_put_noidle(&pdev->dev);
+ return 0;
+
+//err_register:
+// igb_release_hw_control(adapter);
+#ifdef HAVE_I2C_SUPPORT
+ memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap));
+#endif /* HAVE_I2C_SUPPORT */
+err_eeprom:
+// if (!e1000_check_reset_block(hw))
+// e1000_phy_hw_reset(hw);
+
+ if (hw->flash_address)
+ iounmap(hw->flash_address);
+err_sw_init:
+// igb_clear_interrupt_scheme(adapter);
+// igb_reset_sriov_capability(adapter);
+ iounmap(hw->hw_addr);
+err_ioremap:
+ free_netdev(netdev);
+err_alloc_etherdev:
+// pci_release_selected_regions(pdev,
+// pci_select_bars(pdev, IORESOURCE_MEM));
+//err_pci_reg:
+//err_dma:
+ pci_disable_device(pdev);
+ return err;
+}
+
+
+void igb_kni_remove(struct pci_dev *pdev)
+{
+ pci_disable_device(pdev);
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
new file mode 100644
index 00000000..f79ce7c1
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
@@ -0,0 +1,847 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+#include <linux/netdevice.h>
+
+#include "igb.h"
+
+/* This is the only thing that needs to be changed to adjust the
+ * maximum number of ports that the driver can manage.
+ */
+
+#define IGB_MAX_NIC 32
+
+#define OPTION_UNSET -1
+#define OPTION_DISABLED 0
+#define OPTION_ENABLED 1
+#define MAX_NUM_LIST_OPTS 15
+
+/* All parameters are treated the same, as an integer array of values.
+ * This macro just reduces the need to repeat the same declaration code
+ * over and over (plus this helps to avoid typo bugs).
+ */
+
+#define IGB_PARAM_INIT { [0 ... IGB_MAX_NIC] = OPTION_UNSET }
+#ifndef module_param_array
+/* Module Parameters are always initialized to -1, so that the driver
+ * can tell the difference between no user specified value or the
+ * user asking for the default value.
+ * The true default values are loaded in when igb_check_options is called.
+ *
+ * This is a GCC extension to ANSI C.
+ * See the item "Labeled Elements in Initializers" in the section
+ * "Extensions to the C Language Family" of the GCC documentation.
+ */
+
+#define IGB_PARAM(X, desc) \
+ static const int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \
+ MODULE_PARM(X, "1-" __MODULE_STRING(IGB_MAX_NIC) "i"); \
+ MODULE_PARM_DESC(X, desc);
+#else
+#define IGB_PARAM(X, desc) \
+ static int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \
+ static unsigned int num_##X; \
+ module_param_array_named(X, X, int, &num_##X, 0); \
+ MODULE_PARM_DESC(X, desc);
+#endif
+
+/* Interrupt Throttle Rate (interrupts/sec)
+ *
+ * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative)
+ */
+IGB_PARAM(InterruptThrottleRate,
+ "Maximum interrupts per second, per vector, (max 100000), default 3=adaptive");
+#define DEFAULT_ITR 3
+#define MAX_ITR 100000
+/* #define MIN_ITR 120 */
+#define MIN_ITR 0
+/* IntMode (Interrupt Mode)
+ *
+ * Valid Range: 0 - 2
+ *
+ * Default Value: 2 (MSI-X)
+ */
+IGB_PARAM(IntMode, "Change Interrupt Mode (0=Legacy, 1=MSI, 2=MSI-X), default 2");
+#define MAX_INTMODE IGB_INT_MODE_MSIX
+#define MIN_INTMODE IGB_INT_MODE_LEGACY
+
+IGB_PARAM(Node, "set the starting node to allocate memory on, default -1");
+
+/* LLIPort (Low Latency Interrupt TCP Port)
+ *
+ * Valid Range: 0 - 65535
+ *
+ * Default Value: 0 (disabled)
+ */
+IGB_PARAM(LLIPort, "Low Latency Interrupt TCP Port (0-65535), default 0=off");
+
+#define DEFAULT_LLIPORT 0
+#define MAX_LLIPORT 0xFFFF
+#define MIN_LLIPORT 0
+
+/* LLIPush (Low Latency Interrupt on TCP Push flag)
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 0 (disabled)
+ */
+IGB_PARAM(LLIPush, "Low Latency Interrupt on TCP Push flag (0,1), default 0=off");
+
+#define DEFAULT_LLIPUSH 0
+#define MAX_LLIPUSH 1
+#define MIN_LLIPUSH 0
+
+/* LLISize (Low Latency Interrupt on Packet Size)
+ *
+ * Valid Range: 0 - 1500
+ *
+ * Default Value: 0 (disabled)
+ */
+IGB_PARAM(LLISize, "Low Latency Interrupt on Packet Size (0-1500), default 0=off");
+
+#define DEFAULT_LLISIZE 0
+#define MAX_LLISIZE 1500
+#define MIN_LLISIZE 0
+
+/* RSS (Enable RSS multiqueue receive)
+ *
+ * Valid Range: 0 - 8
+ *
+ * Default Value: 1
+ */
+IGB_PARAM(RSS, "Number of Receive-Side Scaling Descriptor Queues (0-8), default 1, 0=number of cpus");
+
+#define DEFAULT_RSS 1
+#define MAX_RSS 8
+#define MIN_RSS 0
+
+/* VMDQ (Enable VMDq multiqueue receive)
+ *
+ * Valid Range: 0 - 8
+ *
+ * Default Value: 0
+ */
+IGB_PARAM(VMDQ, "Number of Virtual Machine Device Queues: 0-1 = disable, 2-8 enable, default 0");
+
+#define DEFAULT_VMDQ 0
+#define MAX_VMDQ MAX_RSS
+#define MIN_VMDQ 0
+
+/* max_vfs (Enable SR-IOV VF devices)
+ *
+ * Valid Range: 0 - 7
+ *
+ * Default Value: 0
+ */
+IGB_PARAM(max_vfs, "Number of Virtual Functions: 0 = disable, 1-7 enable, default 0");
+
+#define DEFAULT_SRIOV 0
+#define MAX_SRIOV 7
+#define MIN_SRIOV 0
+
+/* MDD (Enable Malicious Driver Detection)
+ *
+ * Only available when SR-IOV is enabled - max_vfs is greater than 0
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 1
+ */
+IGB_PARAM(MDD, "Malicious Driver Detection (0/1), default 1 = enabled. "
+ "Only available when max_vfs is greater than 0");
+
+#ifdef DEBUG
+
+/* Disable Hardware Reset on Tx Hang
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 0 (disabled, i.e. h/w will reset)
+ */
+IGB_PARAM(DisableHwReset, "Disable reset of hardware on Tx hang");
+
+/* Dump Transmit and Receive buffers
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 0
+ */
+IGB_PARAM(DumpBuffers, "Dump Tx/Rx buffers on Tx hang or by request");
+
+#endif /* DEBUG */
+
+/* QueuePairs (Enable TX/RX queue pairs for interrupt handling)
+ *
+ * Valid Range: 0 - 1
+ *
+ * Default Value: 1
+ */
+IGB_PARAM(QueuePairs, "Enable Tx/Rx queue pairs for interrupt handling (0,1), default 1=on");
+
+#define DEFAULT_QUEUE_PAIRS 1
+#define MAX_QUEUE_PAIRS 1
+#define MIN_QUEUE_PAIRS 0
+
+/* Enable/disable EEE (a.k.a. IEEE802.3az)
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 1
+ */
+ IGB_PARAM(EEE, "Enable/disable on parts that support the feature");
+
+/* Enable/disable DMA Coalescing
+ *
+ * Valid Values: 0(off), 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000,
+ * 9000, 10000(msec), 250(usec), 500(usec)
+ *
+ * Default Value: 0
+ */
+ IGB_PARAM(DMAC, "Disable or set latency for DMA Coalescing ((0=off, 1000-10000(msec), 250, 500 (usec))");
+
+#ifndef IGB_NO_LRO
+/* Enable/disable Large Receive Offload
+ *
+ * Valid Values: 0(off), 1(on)
+ *
+ * Default Value: 0
+ */
+ IGB_PARAM(LRO, "Large Receive Offload (0,1), default 0=off");
+
+#endif
+struct igb_opt_list {
+ int i;
+ char *str;
+};
+struct igb_option {
+ enum { enable_option, range_option, list_option } type;
+ const char *name;
+ const char *err;
+ int def;
+ union {
+ struct { /* range_option info */
+ int min;
+ int max;
+ } r;
+ struct { /* list_option info */
+ int nr;
+ struct igb_opt_list *p;
+ } l;
+ } arg;
+};
+
+static int igb_validate_option(unsigned int *value,
+ struct igb_option *opt,
+ struct igb_adapter *adapter)
+{
+ if (*value == OPTION_UNSET) {
+ *value = opt->def;
+ return 0;
+ }
+
+ switch (opt->type) {
+ case enable_option:
+ switch (*value) {
+ case OPTION_ENABLED:
+ DPRINTK(PROBE, INFO, "%s Enabled\n", opt->name);
+ return 0;
+ case OPTION_DISABLED:
+ DPRINTK(PROBE, INFO, "%s Disabled\n", opt->name);
+ return 0;
+ }
+ break;
+ case range_option:
+ if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
+ DPRINTK(PROBE, INFO,
+ "%s set to %d\n", opt->name, *value);
+ return 0;
+ }
+ break;
+ case list_option: {
+ int i;
+ struct igb_opt_list *ent;
+
+ for (i = 0; i < opt->arg.l.nr; i++) {
+ ent = &opt->arg.l.p[i];
+ if (*value == ent->i) {
+ if (ent->str[0] != '\0')
+ DPRINTK(PROBE, INFO, "%s\n", ent->str);
+ return 0;
+ }
+ }
+ }
+ break;
+ default:
+ BUG();
+ }
+
+ DPRINTK(PROBE, INFO, "Invalid %s value specified (%d) %s\n",
+ opt->name, *value, opt->err);
+ *value = opt->def;
+ return -1;
+}
+
+/**
+ * igb_check_options - Range Checking for Command Line Parameters
+ * @adapter: board private structure
+ *
+ * This routine checks all command line parameters for valid user
+ * input. If an invalid value is given, or if no user specified
+ * value exists, a default value is used. The final value is stored
+ * in a variable in the adapter structure.
+ **/
+
+void igb_check_options(struct igb_adapter *adapter)
+{
+ int bd = adapter->bd_number;
+ struct e1000_hw *hw = &adapter->hw;
+
+ if (bd >= IGB_MAX_NIC) {
+ DPRINTK(PROBE, NOTICE,
+ "Warning: no configuration for board #%d\n", bd);
+ DPRINTK(PROBE, NOTICE, "Using defaults for all values\n");
+#ifndef module_param_array
+ bd = IGB_MAX_NIC;
+#endif
+ }
+
+ { /* Interrupt Throttling Rate */
+ struct igb_option opt = {
+ .type = range_option,
+ .name = "Interrupt Throttling Rate (ints/sec)",
+ .err = "using default of " __MODULE_STRING(DEFAULT_ITR),
+ .def = DEFAULT_ITR,
+ .arg = { .r = { .min = MIN_ITR,
+ .max = MAX_ITR } }
+ };
+
+#ifdef module_param_array
+ if (num_InterruptThrottleRate > bd) {
+#endif
+ unsigned int itr = InterruptThrottleRate[bd];
+
+ switch (itr) {
+ case 0:
+ DPRINTK(PROBE, INFO, "%s turned off\n",
+ opt.name);
+ if (hw->mac.type >= e1000_i350)
+ adapter->dmac = IGB_DMAC_DISABLE;
+ adapter->rx_itr_setting = itr;
+ break;
+ case 1:
+ DPRINTK(PROBE, INFO, "%s set to dynamic mode\n",
+ opt.name);
+ adapter->rx_itr_setting = itr;
+ break;
+ case 3:
+ DPRINTK(PROBE, INFO,
+ "%s set to dynamic conservative mode\n",
+ opt.name);
+ adapter->rx_itr_setting = itr;
+ break;
+ default:
+ igb_validate_option(&itr, &opt, adapter);
+ /* Save the setting, because the dynamic bits
+ * change itr. In case of invalid user value,
+ * default to conservative mode, else need to
+ * clear the lower two bits because they are
+ * used as control */
+ if (itr == 3) {
+ adapter->rx_itr_setting = itr;
+ } else {
+ adapter->rx_itr_setting = 1000000000 /
+ (itr * 256);
+ adapter->rx_itr_setting &= ~3;
+ }
+ break;
+ }
+#ifdef module_param_array
+ } else {
+ adapter->rx_itr_setting = opt.def;
+ }
+#endif
+ adapter->tx_itr_setting = adapter->rx_itr_setting;
+ }
+ { /* Interrupt Mode */
+ struct igb_option opt = {
+ .type = range_option,
+ .name = "Interrupt Mode",
+ .err = "defaulting to 2 (MSI-X)",
+ .def = IGB_INT_MODE_MSIX,
+ .arg = { .r = { .min = MIN_INTMODE,
+ .max = MAX_INTMODE } }
+ };
+
+#ifdef module_param_array
+ if (num_IntMode > bd) {
+#endif
+ unsigned int int_mode = IntMode[bd];
+ igb_validate_option(&int_mode, &opt, adapter);
+ adapter->int_mode = int_mode;
+#ifdef module_param_array
+ } else {
+ adapter->int_mode = opt.def;
+ }
+#endif
+ }
+ { /* Low Latency Interrupt TCP Port */
+ struct igb_option opt = {
+ .type = range_option,
+ .name = "Low Latency Interrupt TCP Port",
+ .err = "using default of " __MODULE_STRING(DEFAULT_LLIPORT),
+ .def = DEFAULT_LLIPORT,
+ .arg = { .r = { .min = MIN_LLIPORT,
+ .max = MAX_LLIPORT } }
+ };
+
+#ifdef module_param_array
+ if (num_LLIPort > bd) {
+#endif
+ adapter->lli_port = LLIPort[bd];
+ if (adapter->lli_port) {
+ igb_validate_option(&adapter->lli_port, &opt,
+ adapter);
+ } else {
+ DPRINTK(PROBE, INFO, "%s turned off\n",
+ opt.name);
+ }
+#ifdef module_param_array
+ } else {
+ adapter->lli_port = opt.def;
+ }
+#endif
+ }
+ { /* Low Latency Interrupt on Packet Size */
+ struct igb_option opt = {
+ .type = range_option,
+ .name = "Low Latency Interrupt on Packet Size",
+ .err = "using default of " __MODULE_STRING(DEFAULT_LLISIZE),
+ .def = DEFAULT_LLISIZE,
+ .arg = { .r = { .min = MIN_LLISIZE,
+ .max = MAX_LLISIZE } }
+ };
+
+#ifdef module_param_array
+ if (num_LLISize > bd) {
+#endif
+ adapter->lli_size = LLISize[bd];
+ if (adapter->lli_size) {
+ igb_validate_option(&adapter->lli_size, &opt,
+ adapter);
+ } else {
+ DPRINTK(PROBE, INFO, "%s turned off\n",
+ opt.name);
+ }
+#ifdef module_param_array
+ } else {
+ adapter->lli_size = opt.def;
+ }
+#endif
+ }
+ { /* Low Latency Interrupt on TCP Push flag */
+ struct igb_option opt = {
+ .type = enable_option,
+ .name = "Low Latency Interrupt on TCP Push flag",
+ .err = "defaulting to Disabled",
+ .def = OPTION_DISABLED
+ };
+
+#ifdef module_param_array
+ if (num_LLIPush > bd) {
+#endif
+ unsigned int lli_push = LLIPush[bd];
+ igb_validate_option(&lli_push, &opt, adapter);
+ adapter->flags |= lli_push ? IGB_FLAG_LLI_PUSH : 0;
+#ifdef module_param_array
+ } else {
+ adapter->flags |= opt.def ? IGB_FLAG_LLI_PUSH : 0;
+ }
+#endif
+ }
+ { /* SRIOV - Enable SR-IOV VF devices */
+ struct igb_option opt = {
+ .type = range_option,
+ .name = "max_vfs - SR-IOV VF devices",
+ .err = "using default of " __MODULE_STRING(DEFAULT_SRIOV),
+ .def = DEFAULT_SRIOV,
+ .arg = { .r = { .min = MIN_SRIOV,
+ .max = MAX_SRIOV } }
+ };
+
+#ifdef module_param_array
+ if (num_max_vfs > bd) {
+#endif
+ adapter->vfs_allocated_count = max_vfs[bd];
+ igb_validate_option(&adapter->vfs_allocated_count, &opt, adapter);
+
+#ifdef module_param_array
+ } else {
+ adapter->vfs_allocated_count = opt.def;
+ }
+#endif
+ if (adapter->vfs_allocated_count) {
+ switch (hw->mac.type) {
+ case e1000_82575:
+ case e1000_82580:
+ case e1000_i210:
+ case e1000_i211:
+ case e1000_i354:
+ adapter->vfs_allocated_count = 0;
+ DPRINTK(PROBE, INFO, "SR-IOV option max_vfs not supported.\n");
+ default:
+ break;
+ }
+ }
+ }
+ { /* VMDQ - Enable VMDq multiqueue receive */
+ struct igb_option opt = {
+ .type = range_option,
+ .name = "VMDQ - VMDq multiqueue queue count",
+ .err = "using default of " __MODULE_STRING(DEFAULT_VMDQ),
+ .def = DEFAULT_VMDQ,
+ .arg = { .r = { .min = MIN_VMDQ,
+ .max = (MAX_VMDQ - adapter->vfs_allocated_count) } }
+ };
+ if ((hw->mac.type != e1000_i210) ||
+ (hw->mac.type != e1000_i211)) {
+#ifdef module_param_array
+ if (num_VMDQ > bd) {
+#endif
+ adapter->vmdq_pools = (VMDQ[bd] == 1 ? 0 : VMDQ[bd]);
+ if (adapter->vfs_allocated_count && !adapter->vmdq_pools) {
+ DPRINTK(PROBE, INFO, "Enabling SR-IOV requires VMDq be set to at least 1\n");
+ adapter->vmdq_pools = 1;
+ }
+ igb_validate_option(&adapter->vmdq_pools, &opt, adapter);
+
+#ifdef module_param_array
+ } else {
+ if (!adapter->vfs_allocated_count)
+ adapter->vmdq_pools = (opt.def == 1 ? 0 : opt.def);
+ else
+ adapter->vmdq_pools = 1;
+ }
+#endif
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+ if (hw->mac.type == e1000_82575 && adapter->vmdq_pools) {
+ DPRINTK(PROBE, INFO, "VMDq not supported on this part.\n");
+ adapter->vmdq_pools = 0;
+ }
+#endif
+
+ } else {
+ DPRINTK(PROBE, INFO, "VMDq option is not supported.\n");
+ adapter->vmdq_pools = opt.def;
+ }
+ }
+ { /* RSS - Enable RSS multiqueue receives */
+ struct igb_option opt = {
+ .type = range_option,
+ .name = "RSS - RSS multiqueue receive count",
+ .err = "using default of " __MODULE_STRING(DEFAULT_RSS),
+ .def = DEFAULT_RSS,
+ .arg = { .r = { .min = MIN_RSS,
+ .max = MAX_RSS } }
+ };
+
+ switch (hw->mac.type) {
+ case e1000_82575:
+#ifndef CONFIG_IGB_VMDQ_NETDEV
+ if (!!adapter->vmdq_pools) {
+ if (adapter->vmdq_pools <= 2) {
+ if (adapter->vmdq_pools == 2)
+ opt.arg.r.max = 3;
+ } else {
+ opt.arg.r.max = 1;
+ }
+ } else {
+ opt.arg.r.max = 4;
+ }
+#else
+ opt.arg.r.max = !!adapter->vmdq_pools ? 1 : 4;
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+ break;
+ case e1000_i210:
+ opt.arg.r.max = 4;
+ break;
+ case e1000_i211:
+ opt.arg.r.max = 2;
+ break;
+ case e1000_82576:
+#ifndef CONFIG_IGB_VMDQ_NETDEV
+ if (!!adapter->vmdq_pools)
+ opt.arg.r.max = 2;
+ break;
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ default:
+ if (!!adapter->vmdq_pools)
+ opt.arg.r.max = 1;
+ break;
+ }
+
+ if (adapter->int_mode != IGB_INT_MODE_MSIX) {
+ DPRINTK(PROBE, INFO, "RSS is not supported when in MSI/Legacy Interrupt mode, %s\n",
+ opt.err);
+ opt.arg.r.max = 1;
+ }
+
+#ifdef module_param_array
+ if (num_RSS > bd) {
+#endif
+ adapter->rss_queues = RSS[bd];
+ switch (adapter->rss_queues) {
+ case 1:
+ break;
+ default:
+ igb_validate_option(&adapter->rss_queues, &opt, adapter);
+ if (adapter->rss_queues)
+ break;
+ case 0:
+ adapter->rss_queues = min_t(u32, opt.arg.r.max, num_online_cpus());
+ break;
+ }
+#ifdef module_param_array
+ } else {
+ adapter->rss_queues = opt.def;
+ }
+#endif
+ }
+ { /* QueuePairs - Enable Tx/Rx queue pairs for interrupt handling */
+ struct igb_option opt = {
+ .type = enable_option,
+ .name = "QueuePairs - Tx/Rx queue pairs for interrupt handling",
+ .err = "defaulting to Enabled",
+ .def = OPTION_ENABLED
+ };
+#ifdef module_param_array
+ if (num_QueuePairs > bd) {
+#endif
+ unsigned int qp = QueuePairs[bd];
+ /*
+ * We must enable queue pairs if the number of queues
+ * exceeds the number of available interrupts. We are
+ * limited to 10, or 3 per unallocated vf. On I210 and
+ * I211 devices, we are limited to 5 interrupts.
+ * However, since I211 only supports 2 queues, we do not
+ * need to check and override the user option.
+ */
+ if (qp == OPTION_DISABLED) {
+ if (adapter->rss_queues > 4)
+ qp = OPTION_ENABLED;
+
+ if (adapter->vmdq_pools > 4)
+ qp = OPTION_ENABLED;
+
+ if (adapter->rss_queues > 1 &&
+ (adapter->vmdq_pools > 3 ||
+ adapter->vfs_allocated_count > 6))
+ qp = OPTION_ENABLED;
+
+ if (hw->mac.type == e1000_i210 &&
+ adapter->rss_queues > 2)
+ qp = OPTION_ENABLED;
+
+ if (qp == OPTION_ENABLED)
+ DPRINTK(PROBE, INFO, "Number of queues exceeds available interrupts, %s\n",
+ opt.err);
+ }
+ igb_validate_option(&qp, &opt, adapter);
+ adapter->flags |= qp ? IGB_FLAG_QUEUE_PAIRS : 0;
+#ifdef module_param_array
+ } else {
+ adapter->flags |= opt.def ? IGB_FLAG_QUEUE_PAIRS : 0;
+ }
+#endif
+ }
+ { /* EEE - Enable EEE for capable adapters */
+
+ if (hw->mac.type >= e1000_i350) {
+ struct igb_option opt = {
+ .type = enable_option,
+ .name = "EEE Support",
+ .err = "defaulting to Enabled",
+ .def = OPTION_ENABLED
+ };
+#ifdef module_param_array
+ if (num_EEE > bd) {
+#endif
+ unsigned int eee = EEE[bd];
+ igb_validate_option(&eee, &opt, adapter);
+ adapter->flags |= eee ? IGB_FLAG_EEE : 0;
+ if (eee)
+ hw->dev_spec._82575.eee_disable = false;
+ else
+ hw->dev_spec._82575.eee_disable = true;
+
+#ifdef module_param_array
+ } else {
+ adapter->flags |= opt.def ? IGB_FLAG_EEE : 0;
+ if (adapter->flags & IGB_FLAG_EEE)
+ hw->dev_spec._82575.eee_disable = false;
+ else
+ hw->dev_spec._82575.eee_disable = true;
+ }
+#endif
+ }
+ }
+ { /* DMAC - Enable DMA Coalescing for capable adapters */
+
+ if (hw->mac.type >= e1000_i350) {
+ struct igb_opt_list list [] = {
+ { IGB_DMAC_DISABLE, "DMAC Disable"},
+ { IGB_DMAC_MIN, "DMAC 250 usec"},
+ { IGB_DMAC_500, "DMAC 500 usec"},
+ { IGB_DMAC_EN_DEFAULT, "DMAC 1000 usec"},
+ { IGB_DMAC_2000, "DMAC 2000 usec"},
+ { IGB_DMAC_3000, "DMAC 3000 usec"},
+ { IGB_DMAC_4000, "DMAC 4000 usec"},
+ { IGB_DMAC_5000, "DMAC 5000 usec"},
+ { IGB_DMAC_6000, "DMAC 6000 usec"},
+ { IGB_DMAC_7000, "DMAC 7000 usec"},
+ { IGB_DMAC_8000, "DMAC 8000 usec"},
+ { IGB_DMAC_9000, "DMAC 9000 usec"},
+ { IGB_DMAC_MAX, "DMAC 10000 usec"}
+ };
+ struct igb_option opt = {
+ .type = list_option,
+ .name = "DMA Coalescing",
+ .err = "using default of "__MODULE_STRING(IGB_DMAC_DISABLE),
+ .def = IGB_DMAC_DISABLE,
+ .arg = { .l = { .nr = 13,
+ .p = list
+ }
+ }
+ };
+#ifdef module_param_array
+ if (num_DMAC > bd) {
+#endif
+ unsigned int dmac = DMAC[bd];
+ if (adapter->rx_itr_setting == IGB_DMAC_DISABLE)
+ dmac = IGB_DMAC_DISABLE;
+ igb_validate_option(&dmac, &opt, adapter);
+ switch (dmac) {
+ case IGB_DMAC_DISABLE:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_MIN:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_500:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_EN_DEFAULT:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_2000:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_3000:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_4000:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_5000:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_6000:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_7000:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_8000:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_9000:
+ adapter->dmac = dmac;
+ break;
+ case IGB_DMAC_MAX:
+ adapter->dmac = dmac;
+ break;
+ default:
+ adapter->dmac = opt.def;
+ DPRINTK(PROBE, INFO,
+ "Invalid DMAC setting, "
+ "resetting DMAC to %d\n", opt.def);
+ }
+#ifdef module_param_array
+ } else
+ adapter->dmac = opt.def;
+#endif
+ }
+ }
+#ifndef IGB_NO_LRO
+ { /* LRO - Enable Large Receive Offload */
+ struct igb_option opt = {
+ .type = enable_option,
+ .name = "LRO - Large Receive Offload",
+ .err = "defaulting to Disabled",
+ .def = OPTION_DISABLED
+ };
+ struct net_device *netdev = adapter->netdev;
+#ifdef module_param_array
+ if (num_LRO > bd) {
+#endif
+ unsigned int lro = LRO[bd];
+ igb_validate_option(&lro, &opt, adapter);
+ netdev->features |= lro ? NETIF_F_LRO : 0;
+#ifdef module_param_array
+ } else if (opt.def == OPTION_ENABLED) {
+ netdev->features |= NETIF_F_LRO;
+ }
+#endif
+ }
+#endif /* IGB_NO_LRO */
+ { /* MDD - Enable Malicious Driver Detection. Only available when
+ SR-IOV is enabled. */
+ struct igb_option opt = {
+ .type = enable_option,
+ .name = "Malicious Driver Detection",
+ .err = "defaulting to 1",
+ .def = OPTION_ENABLED,
+ .arg = { .r = { .min = OPTION_DISABLED,
+ .max = OPTION_ENABLED } }
+ };
+
+#ifdef module_param_array
+ if (num_MDD > bd) {
+#endif
+ adapter->mdd = MDD[bd];
+ igb_validate_option((uint *)&adapter->mdd, &opt,
+ adapter);
+#ifdef module_param_array
+ } else {
+ adapter->mdd = opt.def;
+ }
+#endif
+ }
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c
new file mode 100644
index 00000000..66236d29
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c
@@ -0,0 +1,363 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "igb.h"
+#include "e1000_82575.h"
+#include "e1000_hw.h"
+
+#ifdef IGB_PROCFS
+#ifndef IGB_HWMON
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+
+static struct proc_dir_entry *igb_top_dir = NULL;
+
+
+bool igb_thermal_present(struct igb_adapter *adapter)
+{
+ s32 status;
+ struct e1000_hw *hw;
+
+ if (adapter == NULL)
+ return false;
+ hw = &adapter->hw;
+
+ /*
+ * Only set I2C bit-bang mode if an external thermal sensor is
+ * supported on this device.
+ */
+ if (adapter->ets) {
+ status = e1000_set_i2c_bb(hw);
+ if (status != E1000_SUCCESS)
+ return false;
+ }
+
+ status = hw->mac.ops.init_thermal_sensor_thresh(hw);
+ if (status != E1000_SUCCESS)
+ return false;
+
+ return true;
+}
+
+
+static int igb_macburn(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct e1000_hw *hw;
+ struct igb_adapter *adapter = (struct igb_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
+ (unsigned int)hw->mac.perm_addr[0],
+ (unsigned int)hw->mac.perm_addr[1],
+ (unsigned int)hw->mac.perm_addr[2],
+ (unsigned int)hw->mac.perm_addr[3],
+ (unsigned int)hw->mac.perm_addr[4],
+ (unsigned int)hw->mac.perm_addr[5]);
+}
+
+static int igb_macadmn(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct e1000_hw *hw;
+ struct igb_adapter *adapter = (struct igb_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
+ (unsigned int)hw->mac.addr[0],
+ (unsigned int)hw->mac.addr[1],
+ (unsigned int)hw->mac.addr[2],
+ (unsigned int)hw->mac.addr[3],
+ (unsigned int)hw->mac.addr[4],
+ (unsigned int)hw->mac.addr[5]);
+}
+
+static int igb_numeports(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct e1000_hw *hw;
+ int ports;
+ struct igb_adapter *adapter = (struct igb_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ hw = &adapter->hw;
+ if (hw == NULL)
+ return snprintf(page, count, "error: no hw data\n");
+
+ ports = 4;
+
+ return snprintf(page, count, "%d\n", ports);
+}
+
+static int igb_porttype(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct igb_adapter *adapter = (struct igb_adapter *)data;
+ if (adapter == NULL)
+ return snprintf(page, count, "error: no adapter\n");
+
+ return snprintf(page, count, "%d\n",
+ test_bit(__IGB_DOWN, &adapter->state));
+}
+
+static int igb_therm_location(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct igb_therm_proc_data *therm_data =
+ (struct igb_therm_proc_data *)data;
+
+ if (therm_data == NULL)
+ return snprintf(page, count, "error: no therm_data\n");
+
+ return snprintf(page, count, "%d\n", therm_data->sensor_data->location);
+}
+
+static int igb_therm_maxopthresh(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct igb_therm_proc_data *therm_data =
+ (struct igb_therm_proc_data *)data;
+
+ if (therm_data == NULL)
+ return snprintf(page, count, "error: no therm_data\n");
+
+ return snprintf(page, count, "%d\n",
+ therm_data->sensor_data->max_op_thresh);
+}
+
+static int igb_therm_cautionthresh(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct igb_therm_proc_data *therm_data =
+ (struct igb_therm_proc_data *)data;
+
+ if (therm_data == NULL)
+ return snprintf(page, count, "error: no therm_data\n");
+
+ return snprintf(page, count, "%d\n",
+ therm_data->sensor_data->caution_thresh);
+}
+
+static int igb_therm_temp(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ s32 status;
+ struct igb_therm_proc_data *therm_data =
+ (struct igb_therm_proc_data *)data;
+
+ if (therm_data == NULL)
+ return snprintf(page, count, "error: no therm_data\n");
+
+ status = e1000_get_thermal_sensor_data(therm_data->hw);
+ if (status != E1000_SUCCESS)
+ snprintf(page, count, "error: status %d returned\n", status);
+
+ return snprintf(page, count, "%d\n", therm_data->sensor_data->temp);
+}
+
+struct igb_proc_type{
+ char name[32];
+ int (*read)(char*, char**, off_t, int, int*, void*);
+};
+
+struct igb_proc_type igb_proc_entries[] = {
+ {"numeports", &igb_numeports},
+ {"porttype", &igb_porttype},
+ {"macburn", &igb_macburn},
+ {"macadmn", &igb_macadmn},
+ {"", NULL}
+};
+
+struct igb_proc_type igb_internal_entries[] = {
+ {"location", &igb_therm_location},
+ {"temp", &igb_therm_temp},
+ {"cautionthresh", &igb_therm_cautionthresh},
+ {"maxopthresh", &igb_therm_maxopthresh},
+ {"", NULL}
+};
+
+void igb_del_proc_entries(struct igb_adapter *adapter)
+{
+ int index, i;
+ char buf[16]; /* much larger than the sensor number will ever be */
+
+ if (igb_top_dir == NULL)
+ return;
+
+ for (i = 0; i < E1000_MAX_SENSORS; i++) {
+ if (adapter->therm_dir[i] == NULL)
+ continue;
+
+ for (index = 0; ; index++) {
+ if (igb_internal_entries[index].read == NULL)
+ break;
+
+ remove_proc_entry(igb_internal_entries[index].name,
+ adapter->therm_dir[i]);
+ }
+ snprintf(buf, sizeof(buf), "sensor_%d", i);
+ remove_proc_entry(buf, adapter->info_dir);
+ }
+
+ if (adapter->info_dir != NULL) {
+ for (index = 0; ; index++) {
+ if (igb_proc_entries[index].read == NULL)
+ break;
+ remove_proc_entry(igb_proc_entries[index].name,
+ adapter->info_dir);
+ }
+ remove_proc_entry("info", adapter->eth_dir);
+ }
+
+ if (adapter->eth_dir != NULL)
+ remove_proc_entry(pci_name(adapter->pdev), igb_top_dir);
+}
+
+/* called from igb_main.c */
+void igb_procfs_exit(struct igb_adapter *adapter)
+{
+ igb_del_proc_entries(adapter);
+}
+
+int igb_procfs_topdir_init(void)
+{
+ igb_top_dir = proc_mkdir("driver/igb", NULL);
+ if (igb_top_dir == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void igb_procfs_topdir_exit(void)
+{
+ remove_proc_entry("driver/igb", NULL);
+}
+
+/* called from igb_main.c */
+int igb_procfs_init(struct igb_adapter *adapter)
+{
+ int rc = 0;
+ int i;
+ int index;
+ char buf[16]; /* much larger than the sensor number will ever be */
+
+ adapter->eth_dir = NULL;
+ adapter->info_dir = NULL;
+ for (i = 0; i < E1000_MAX_SENSORS; i++)
+ adapter->therm_dir[i] = NULL;
+
+ if ( igb_top_dir == NULL ) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ adapter->eth_dir = proc_mkdir(pci_name(adapter->pdev), igb_top_dir);
+ if (adapter->eth_dir == NULL) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ adapter->info_dir = proc_mkdir("info", adapter->eth_dir);
+ if (adapter->info_dir == NULL) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ for (index = 0; ; index++) {
+ if (igb_proc_entries[index].read == NULL) {
+ break;
+ }
+ if (!(create_proc_read_entry(igb_proc_entries[index].name,
+ 0444,
+ adapter->info_dir,
+ igb_proc_entries[index].read,
+ adapter))) {
+
+ rc = -ENOMEM;
+ goto fail;
+ }
+ }
+ if (igb_thermal_present(adapter) == false)
+ goto exit;
+
+ for (i = 0; i < E1000_MAX_SENSORS; i++) {
+
+ if (adapter->hw.mac.thermal_sensor_data.sensor[i].location== 0)
+ continue;
+
+ snprintf(buf, sizeof(buf), "sensor_%d", i);
+ adapter->therm_dir[i] = proc_mkdir(buf, adapter->info_dir);
+ if (adapter->therm_dir[i] == NULL) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ for (index = 0; ; index++) {
+ if (igb_internal_entries[index].read == NULL)
+ break;
+ /*
+ * therm_data struct contains pointer the read func
+ * will be needing
+ */
+ adapter->therm_data[i].hw = &adapter->hw;
+ adapter->therm_data[i].sensor_data =
+ &adapter->hw.mac.thermal_sensor_data.sensor[i];
+
+ if (!(create_proc_read_entry(
+ igb_internal_entries[index].name,
+ 0444,
+ adapter->therm_dir[i],
+ igb_internal_entries[index].read,
+ &adapter->therm_data[i]))) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ }
+ }
+ goto exit;
+
+fail:
+ igb_del_proc_entries(adapter);
+exit:
+ return rc;
+}
+
+#endif /* !IGB_HWMON */
+#endif /* IGB_PROCFS */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c
new file mode 100644
index 00000000..454b70ce
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c
@@ -0,0 +1,944 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/******************************************************************************
+ Copyright(c) 2011 Richard Cochran <richardcochran@gmail.com> for some of the
+ 82576 and 82580 code
+******************************************************************************/
+
+#include "igb.h"
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/ptp_classify.h>
+
+#define INCVALUE_MASK 0x7fffffff
+#define ISGN 0x80000000
+
+/*
+ * The 82580 timesync updates the system timer every 8ns by 8ns,
+ * and this update value cannot be reprogrammed.
+ *
+ * Neither the 82576 nor the 82580 offer registers wide enough to hold
+ * nanoseconds time values for very long. For the 82580, SYSTIM always
+ * counts nanoseconds, but the upper 24 bits are not available. The
+ * frequency is adjusted by changing the 32 bit fractional nanoseconds
+ * register, TIMINCA.
+ *
+ * For the 82576, the SYSTIM register time unit is affect by the
+ * choice of the 24 bit TININCA:IV (incvalue) field. Five bits of this
+ * field are needed to provide the nominal 16 nanosecond period,
+ * leaving 19 bits for fractional nanoseconds.
+ *
+ * We scale the NIC clock cycle by a large factor so that relatively
+ * small clock corrections can be added or subtracted at each clock
+ * tick. The drawbacks of a large factor are a) that the clock
+ * register overflows more quickly (not such a big deal) and b) that
+ * the increment per tick has to fit into 24 bits. As a result we
+ * need to use a shift of 19 so we can fit a value of 16 into the
+ * TIMINCA register.
+ *
+ *
+ * SYSTIMH SYSTIML
+ * +--------------+ +---+---+------+
+ * 82576 | 32 | | 8 | 5 | 19 |
+ * +--------------+ +---+---+------+
+ * \________ 45 bits _______/ fract
+ *
+ * +----------+---+ +--------------+
+ * 82580 | 24 | 8 | | 32 |
+ * +----------+---+ +--------------+
+ * reserved \______ 40 bits _____/
+ *
+ *
+ * The 45 bit 82576 SYSTIM overflows every
+ * 2^45 * 10^-9 / 3600 = 9.77 hours.
+ *
+ * The 40 bit 82580 SYSTIM overflows every
+ * 2^40 * 10^-9 / 60 = 18.3 minutes.
+ */
+
+#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9)
+#define IGB_PTP_TX_TIMEOUT (HZ * 15)
+#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT)
+#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1)
+#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT)
+#define IGB_NBITS_82580 40
+
+/*
+ * SYSTIM read access for the 82576
+ */
+
+static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc)
+{
+ struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
+ struct e1000_hw *hw = &igb->hw;
+ u64 val;
+ u32 lo, hi;
+
+ lo = E1000_READ_REG(hw, E1000_SYSTIML);
+ hi = E1000_READ_REG(hw, E1000_SYSTIMH);
+
+ val = ((u64) hi) << 32;
+ val |= lo;
+
+ return val;
+}
+
+/*
+ * SYSTIM read access for the 82580
+ */
+
+static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc)
+{
+ struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
+ struct e1000_hw *hw = &igb->hw;
+ u64 val;
+ u32 lo, hi;
+
+ /* The timestamp latches on lowest register read. For the 82580
+ * the lowest register is SYSTIMR instead of SYSTIML. However we only
+ * need to provide nanosecond resolution, so we just ignore it.
+ */
+ E1000_READ_REG(hw, E1000_SYSTIMR);
+ lo = E1000_READ_REG(hw, E1000_SYSTIML);
+ hi = E1000_READ_REG(hw, E1000_SYSTIMH);
+
+ val = ((u64) hi) << 32;
+ val |= lo;
+
+ return val;
+}
+
+/*
+ * SYSTIM read access for I210/I211
+ */
+
+static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 sec, nsec;
+
+ /* The timestamp latches on lowest register read. For I210/I211, the
+ * lowest register is SYSTIMR. Since we only need to provide nanosecond
+ * resolution, we can ignore it.
+ */
+ E1000_READ_REG(hw, E1000_SYSTIMR);
+ nsec = E1000_READ_REG(hw, E1000_SYSTIML);
+ sec = E1000_READ_REG(hw, E1000_SYSTIMH);
+
+ ts->tv_sec = sec;
+ ts->tv_nsec = nsec;
+}
+
+static void igb_ptp_write_i210(struct igb_adapter *adapter,
+ const struct timespec *ts)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ /*
+ * Writing the SYSTIMR register is not necessary as it only provides
+ * sub-nanosecond resolution.
+ */
+ E1000_WRITE_REG(hw, E1000_SYSTIML, ts->tv_nsec);
+ E1000_WRITE_REG(hw, E1000_SYSTIMH, ts->tv_sec);
+}
+
+/**
+ * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp
+ * @adapter: board private structure
+ * @hwtstamps: timestamp structure to update
+ * @systim: unsigned 64bit system time value.
+ *
+ * We need to convert the system time value stored in the RX/TXSTMP registers
+ * into a hwtstamp which can be used by the upper level timestamping functions.
+ *
+ * The 'tmreg_lock' spinlock is used to protect the consistency of the
+ * system time value. This is needed because reading the 64 bit time
+ * value involves reading two (or three) 32 bit registers. The first
+ * read latches the value. Ditto for writing.
+ *
+ * In addition, here have extended the system time with an overflow
+ * counter in software.
+ **/
+static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter,
+ struct skb_shared_hwtstamps *hwtstamps,
+ u64 systim)
+{
+ unsigned long flags;
+ u64 ns;
+
+ switch (adapter->hw.mac.type) {
+ case e1000_82576:
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ spin_lock_irqsave(&adapter->tmreg_lock, flags);
+
+ ns = timecounter_cyc2time(&adapter->tc, systim);
+
+ spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+ memset(hwtstamps, 0, sizeof(*hwtstamps));
+ hwtstamps->hwtstamp = ns_to_ktime(ns);
+ break;
+ case e1000_i210:
+ case e1000_i211:
+ memset(hwtstamps, 0, sizeof(*hwtstamps));
+ /* Upper 32 bits contain s, lower 32 bits contain ns. */
+ hwtstamps->hwtstamp = ktime_set(systim >> 32,
+ systim & 0xFFFFFFFF);
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * PTP clock operations
+ */
+
+static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb)
+{
+ struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+ ptp_caps);
+ struct e1000_hw *hw = &igb->hw;
+ int neg_adj = 0;
+ u64 rate;
+ u32 incvalue;
+
+ if (ppb < 0) {
+ neg_adj = 1;
+ ppb = -ppb;
+ }
+ rate = ppb;
+ rate <<= 14;
+ rate = div_u64(rate, 1953125);
+
+ incvalue = 16 << IGB_82576_TSYNC_SHIFT;
+
+ if (neg_adj)
+ incvalue -= rate;
+ else
+ incvalue += rate;
+
+ E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK));
+
+ return 0;
+}
+
+static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb)
+{
+ struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+ ptp_caps);
+ struct e1000_hw *hw = &igb->hw;
+ int neg_adj = 0;
+ u64 rate;
+ u32 inca;
+
+ if (ppb < 0) {
+ neg_adj = 1;
+ ppb = -ppb;
+ }
+ rate = ppb;
+ rate <<= 26;
+ rate = div_u64(rate, 1953125);
+
+ /* At 2.5G speeds, the TIMINCA register on I354 updates the clock 2.5x
+ * as quickly. Account for this by dividing the adjustment by 2.5.
+ */
+ if (hw->mac.type == e1000_i354) {
+ u32 status = E1000_READ_REG(hw, E1000_STATUS);
+
+ if ((status & E1000_STATUS_2P5_SKU) &&
+ !(status & E1000_STATUS_2P5_SKU_OVER)) {
+ rate <<= 1;
+ rate = div_u64(rate, 5);
+ }
+ }
+
+ inca = rate & INCVALUE_MASK;
+ if (neg_adj)
+ inca |= ISGN;
+
+ E1000_WRITE_REG(hw, E1000_TIMINCA, inca);
+
+ return 0;
+}
+
+static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+ ptp_caps);
+ unsigned long flags;
+ s64 now;
+
+ spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+ now = timecounter_read(&igb->tc);
+ now += delta;
+ timecounter_init(&igb->tc, &igb->cc, now);
+
+ spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+ return 0;
+}
+
+static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+ ptp_caps);
+ unsigned long flags;
+ struct timespec now, then = ns_to_timespec(delta);
+
+ spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+ igb_ptp_read_i210(igb, &now);
+ now = timespec_add(now, then);
+ igb_ptp_write_i210(igb, (const struct timespec *)&now);
+
+ spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+ return 0;
+}
+
+static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp,
+ struct timespec *ts)
+{
+ struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+ ptp_caps);
+ unsigned long flags;
+ u64 ns;
+ u32 remainder;
+
+ spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+ ns = timecounter_read(&igb->tc);
+
+ spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+ ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
+ ts->tv_nsec = remainder;
+
+ return 0;
+}
+
+static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp,
+ struct timespec *ts)
+{
+ struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+ ptp_caps);
+ unsigned long flags;
+
+ spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+ igb_ptp_read_i210(igb, ts);
+
+ spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+ return 0;
+}
+
+static int igb_ptp_settime_82576(struct ptp_clock_info *ptp,
+ const struct timespec *ts)
+{
+ struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+ ptp_caps);
+ unsigned long flags;
+ u64 ns;
+
+ ns = ts->tv_sec * 1000000000ULL;
+ ns += ts->tv_nsec;
+
+ spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+ timecounter_init(&igb->tc, &igb->cc, ns);
+
+ spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+ return 0;
+}
+
+static int igb_ptp_settime_i210(struct ptp_clock_info *ptp,
+ const struct timespec *ts)
+{
+ struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+ ptp_caps);
+ unsigned long flags;
+
+ spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+ igb_ptp_write_i210(igb, ts);
+
+ spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+ return 0;
+}
+
+static int igb_ptp_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq, int on)
+{
+ return -EOPNOTSUPP;
+}
+
+/**
+ * igb_ptp_tx_work
+ * @work: pointer to work struct
+ *
+ * This work function polls the TSYNCTXCTL valid bit to determine when a
+ * timestamp has been taken for the current stored skb.
+ */
+void igb_ptp_tx_work(struct work_struct *work)
+{
+ struct igb_adapter *adapter = container_of(work, struct igb_adapter,
+ ptp_tx_work);
+ struct e1000_hw *hw = &adapter->hw;
+ u32 tsynctxctl;
+
+ if (!adapter->ptp_tx_skb)
+ return;
+
+ if (time_is_before_jiffies(adapter->ptp_tx_start +
+ IGB_PTP_TX_TIMEOUT)) {
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+ adapter->tx_hwtstamp_timeouts++;
+ dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang");
+ return;
+ }
+
+ tsynctxctl = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
+ if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
+ igb_ptp_tx_hwtstamp(adapter);
+ else
+ /* reschedule to check later */
+ schedule_work(&adapter->ptp_tx_work);
+}
+
+static void igb_ptp_overflow_check(struct work_struct *work)
+{
+ struct igb_adapter *igb =
+ container_of(work, struct igb_adapter, ptp_overflow_work.work);
+ struct timespec ts;
+
+ igb->ptp_caps.gettime(&igb->ptp_caps, &ts);
+
+ pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec);
+
+ schedule_delayed_work(&igb->ptp_overflow_work,
+ IGB_SYSTIM_OVERFLOW_PERIOD);
+}
+
+/**
+ * igb_ptp_rx_hang - detect error case when Rx timestamp registers latched
+ * @adapter: private network adapter structure
+ *
+ * This watchdog task is scheduled to detect error case where hardware has
+ * dropped an Rx packet that was timestamped when the ring is full. The
+ * particular error is rare but leaves the device in a state unable to timestamp
+ * any future packets.
+ */
+void igb_ptp_rx_hang(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct igb_ring *rx_ring;
+ u32 tsyncrxctl = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
+ unsigned long rx_event;
+ int n;
+
+ if (hw->mac.type != e1000_82576)
+ return;
+
+ /* If we don't have a valid timestamp in the registers, just update the
+ * timeout counter and exit
+ */
+ if (!(tsyncrxctl & E1000_TSYNCRXCTL_VALID)) {
+ adapter->last_rx_ptp_check = jiffies;
+ return;
+ }
+
+ /* Determine the most recent watchdog or rx_timestamp event */
+ rx_event = adapter->last_rx_ptp_check;
+ for (n = 0; n < adapter->num_rx_queues; n++) {
+ rx_ring = adapter->rx_ring[n];
+ if (time_after(rx_ring->last_rx_timestamp, rx_event))
+ rx_event = rx_ring->last_rx_timestamp;
+ }
+
+ /* Only need to read the high RXSTMP register to clear the lock */
+ if (time_is_before_jiffies(rx_event + 5 * HZ)) {
+ E1000_READ_REG(hw, E1000_RXSTMPH);
+ adapter->last_rx_ptp_check = jiffies;
+ adapter->rx_hwtstamp_cleared++;
+ dev_warn(&adapter->pdev->dev, "clearing Rx timestamp hang");
+ }
+}
+
+/**
+ * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: Board private structure.
+ *
+ * If we were asked to do hardware stamping and such a time stamp is
+ * available, then it must have been for this skb here because we only
+ * allow only one such packet into the queue.
+ */
+void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct skb_shared_hwtstamps shhwtstamps;
+ u64 regval;
+
+ regval = E1000_READ_REG(hw, E1000_TXSTMPL);
+ regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32;
+
+ igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
+ skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+}
+
+/**
+ * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp
+ * @q_vector: Pointer to interrupt specific structure
+ * @va: Pointer to address containing Rx buffer
+ * @skb: Buffer containing timestamp and packet
+ *
+ * This function is meant to retrieve a timestamp from the first buffer of an
+ * incoming frame. The value is stored in little endian format starting on
+ * byte 8.
+ */
+void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
+ unsigned char *va,
+ struct sk_buff *skb)
+{
+ __le64 *regval = (__le64 *)va;
+
+ /*
+ * The timestamp is recorded in little endian format.
+ * DWORD: 0 1 2 3
+ * Field: Reserved Reserved SYSTIML SYSTIMH
+ */
+ igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
+ le64_to_cpu(regval[1]));
+}
+
+/**
+ * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register
+ * @q_vector: Pointer to interrupt specific structure
+ * @skb: Buffer containing timestamp and packet
+ *
+ * This function is meant to retrieve a timestamp from the internal registers
+ * of the adapter and store it in the skb.
+ */
+void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
+ struct sk_buff *skb)
+{
+ struct igb_adapter *adapter = q_vector->adapter;
+ struct e1000_hw *hw = &adapter->hw;
+ u64 regval;
+
+ /*
+ * If this bit is set, then the RX registers contain the time stamp. No
+ * other packet will be time stamped until we read these registers, so
+ * read the registers to make them available again. Because only one
+ * packet can be time stamped at a time, we know that the register
+ * values must belong to this one here and therefore we don't need to
+ * compare any of the additional attributes stored for it.
+ *
+ * If nothing went wrong, then it should have a shared tx_flags that we
+ * can turn into a skb_shared_hwtstamps.
+ */
+ if (!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
+ return;
+
+ regval = E1000_READ_REG(hw, E1000_RXSTMPL);
+ regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32;
+
+ igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
+}
+
+/**
+ * igb_ptp_hwtstamp_ioctl - control hardware time stamping
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't case any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ *
+ **/
+int igb_ptp_hwtstamp_ioctl(struct net_device *netdev,
+ struct ifreq *ifr, int cmd)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ struct hwtstamp_config config;
+ u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
+ u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
+ u32 tsync_rx_cfg = 0;
+ bool is_l4 = false;
+ bool is_l2 = false;
+ u32 regval;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ /* reserved for future extensions */
+ if (config.flags)
+ return -EINVAL;
+
+ switch (config.tx_type) {
+ case HWTSTAMP_TX_OFF:
+ tsync_tx_ctl = 0;
+ case HWTSTAMP_TX_ON:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (config.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ tsync_rx_ctl = 0;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
+ tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
+ is_l4 = true;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
+ tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
+ is_l4 = true;
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
+ config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ is_l2 = true;
+ is_l4 = true;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_ALL:
+ /*
+ * 82576 cannot timestamp all packets, which it needs to do to
+ * support both V1 Sync and Delay_Req messages
+ */
+ if (hw->mac.type != e1000_82576) {
+ tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
+ config.rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ }
+ /* fall through */
+ default:
+ config.rx_filter = HWTSTAMP_FILTER_NONE;
+ return -ERANGE;
+ }
+
+ if (hw->mac.type == e1000_82575) {
+ if (tsync_rx_ctl | tsync_tx_ctl)
+ return -EINVAL;
+ return 0;
+ }
+
+ /*
+ * Per-packet timestamping only works if all packets are
+ * timestamped, so enable timestamping in all packets as
+ * long as one rx filter was configured.
+ */
+ if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
+ tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
+ tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
+ config.rx_filter = HWTSTAMP_FILTER_ALL;
+ is_l2 = true;
+ is_l4 = true;
+
+ if ((hw->mac.type == e1000_i210) ||
+ (hw->mac.type == e1000_i211)) {
+ regval = E1000_READ_REG(hw, E1000_RXPBS);
+ regval |= E1000_RXPBS_CFG_TS_EN;
+ E1000_WRITE_REG(hw, E1000_RXPBS, regval);
+ }
+ }
+
+ /* enable/disable TX */
+ regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
+ regval &= ~E1000_TSYNCTXCTL_ENABLED;
+ regval |= tsync_tx_ctl;
+ E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
+
+ /* enable/disable RX */
+ regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
+ regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
+ regval |= tsync_rx_ctl;
+ E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
+
+ /* define which PTP packets are time stamped */
+ E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
+
+ /* define ethertype filter for timestamped packets */
+ if (is_l2)
+ E1000_WRITE_REG(hw, E1000_ETQF(3),
+ (E1000_ETQF_FILTER_ENABLE | /* enable filter */
+ E1000_ETQF_1588 | /* enable timestamping */
+ ETH_P_1588)); /* 1588 eth protocol type */
+ else
+ E1000_WRITE_REG(hw, E1000_ETQF(3), 0);
+
+ /* L4 Queue Filter[3]: filter by destination port and protocol */
+ if (is_l4) {
+ u32 ftqf = (IPPROTO_UDP /* UDP */
+ | E1000_FTQF_VF_BP /* VF not compared */
+ | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
+ | E1000_FTQF_MASK); /* mask all inputs */
+ ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
+
+ E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_EV_PORT));
+ E1000_WRITE_REG(hw, E1000_IMIREXT(3),
+ (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
+ if (hw->mac.type == e1000_82576) {
+ /* enable source port check */
+ E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_EV_PORT));
+ ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
+ }
+ E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf);
+ } else {
+ E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK);
+ }
+ E1000_WRITE_FLUSH(hw);
+
+ /* clear TX/RX time stamp registers, just to be sure */
+ regval = E1000_READ_REG(hw, E1000_TXSTMPL);
+ regval = E1000_READ_REG(hw, E1000_TXSTMPH);
+ regval = E1000_READ_REG(hw, E1000_RXSTMPL);
+ regval = E1000_READ_REG(hw, E1000_RXSTMPH);
+
+ return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+ -EFAULT : 0;
+}
+
+void igb_ptp_init(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct net_device *netdev = adapter->netdev;
+
+ switch (hw->mac.type) {
+ case e1000_82576:
+ snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+ adapter->ptp_caps.owner = THIS_MODULE;
+ adapter->ptp_caps.max_adj = 999999881;
+ adapter->ptp_caps.n_ext_ts = 0;
+ adapter->ptp_caps.pps = 0;
+ adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576;
+ adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
+ adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
+ adapter->ptp_caps.settime = igb_ptp_settime_82576;
+ adapter->ptp_caps.enable = igb_ptp_enable;
+ adapter->cc.read = igb_ptp_read_82576;
+ adapter->cc.mask = CLOCKSOURCE_MASK(64);
+ adapter->cc.mult = 1;
+ adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
+ /* Dial the nominal frequency. */
+ E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 |
+ INCVALUE_82576);
+ break;
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+ adapter->ptp_caps.owner = THIS_MODULE;
+ adapter->ptp_caps.max_adj = 62499999;
+ adapter->ptp_caps.n_ext_ts = 0;
+ adapter->ptp_caps.pps = 0;
+ adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
+ adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
+ adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
+ adapter->ptp_caps.settime = igb_ptp_settime_82576;
+ adapter->ptp_caps.enable = igb_ptp_enable;
+ adapter->cc.read = igb_ptp_read_82580;
+ adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580);
+ adapter->cc.mult = 1;
+ adapter->cc.shift = 0;
+ /* Enable the timer functions by clearing bit 31. */
+ E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
+ break;
+ case e1000_i210:
+ case e1000_i211:
+ snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+ adapter->ptp_caps.owner = THIS_MODULE;
+ adapter->ptp_caps.max_adj = 62499999;
+ adapter->ptp_caps.n_ext_ts = 0;
+ adapter->ptp_caps.pps = 0;
+ adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
+ adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
+ adapter->ptp_caps.gettime = igb_ptp_gettime_i210;
+ adapter->ptp_caps.settime = igb_ptp_settime_i210;
+ adapter->ptp_caps.enable = igb_ptp_enable;
+ /* Enable the timer functions by clearing bit 31. */
+ E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
+ break;
+ default:
+ adapter->ptp_clock = NULL;
+ return;
+ }
+
+ E1000_WRITE_FLUSH(hw);
+
+ spin_lock_init(&adapter->tmreg_lock);
+ INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
+
+ /* Initialize the clock and overflow work for devices that need it. */
+ if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
+ struct timespec ts = ktime_to_timespec(ktime_get_real());
+
+ igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
+ } else {
+ timecounter_init(&adapter->tc, &adapter->cc,
+ ktime_to_ns(ktime_get_real()));
+
+ INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
+ igb_ptp_overflow_check);
+
+ schedule_delayed_work(&adapter->ptp_overflow_work,
+ IGB_SYSTIM_OVERFLOW_PERIOD);
+ }
+
+ /* Initialize the time sync interrupts for devices that support it. */
+ if (hw->mac.type >= e1000_82580) {
+ E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS);
+ E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS);
+ }
+
+ adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+ &adapter->pdev->dev);
+ if (IS_ERR(adapter->ptp_clock)) {
+ adapter->ptp_clock = NULL;
+ dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
+ } else {
+ dev_info(&adapter->pdev->dev, "added PHC on %s\n",
+ adapter->netdev->name);
+ adapter->flags |= IGB_FLAG_PTP;
+ }
+}
+
+/**
+ * igb_ptp_stop - Disable PTP device and stop the overflow check.
+ * @adapter: Board private structure.
+ *
+ * This function stops the PTP support and cancels the delayed work.
+ **/
+void igb_ptp_stop(struct igb_adapter *adapter)
+{
+ switch (adapter->hw.mac.type) {
+ case e1000_82576:
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ cancel_delayed_work_sync(&adapter->ptp_overflow_work);
+ break;
+ case e1000_i210:
+ case e1000_i211:
+ /* No delayed work to cancel. */
+ break;
+ default:
+ return;
+ }
+
+ cancel_work_sync(&adapter->ptp_tx_work);
+ if (adapter->ptp_tx_skb) {
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+ }
+
+ if (adapter->ptp_clock) {
+ ptp_clock_unregister(adapter->ptp_clock);
+ dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
+ adapter->netdev->name);
+ adapter->flags &= ~IGB_FLAG_PTP;
+ }
+}
+
+/**
+ * igb_ptp_reset - Re-enable the adapter for PTP following a reset.
+ * @adapter: Board private structure.
+ *
+ * This function handles the reset work required to re-enable the PTP device.
+ **/
+void igb_ptp_reset(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ if (!(adapter->flags & IGB_FLAG_PTP))
+ return;
+
+ switch (adapter->hw.mac.type) {
+ case e1000_82576:
+ /* Dial the nominal frequency. */
+ E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 |
+ INCVALUE_82576);
+ break;
+ case e1000_82580:
+ case e1000_i350:
+ case e1000_i354:
+ case e1000_i210:
+ case e1000_i211:
+ /* Enable the timer functions and interrupts. */
+ E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
+ E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS);
+ E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS);
+ break;
+ default:
+ /* No work to do. */
+ return;
+ }
+
+ /* Re-initialize the timer. */
+ if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
+ struct timespec ts = ktime_to_timespec(ktime_get_real());
+
+ igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
+ } else {
+ timecounter_init(&adapter->tc, &adapter->cc,
+ ktime_to_ns(ktime_get_real()));
+ }
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
new file mode 100644
index 00000000..18da64a3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
@@ -0,0 +1,249 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* ethtool register test data */
+struct igb_reg_test {
+ u16 reg;
+ u16 reg_offset;
+ u16 array_len;
+ u16 test_type;
+ u32 mask;
+ u32 write;
+};
+
+/* In the hardware, registers are laid out either singly, in arrays
+ * spaced 0x100 bytes apart, or in contiguous tables. We assume
+ * most tests take place on arrays or single registers (handled
+ * as a single-element array) and special-case the tables.
+ * Table tests are always pattern tests.
+ *
+ * We also make provision for some required setup steps by specifying
+ * registers to be written without any read-back testing.
+ */
+
+#define PATTERN_TEST 1
+#define SET_READ_TEST 2
+#define WRITE_NO_TEST 3
+#define TABLE32_TEST 4
+#define TABLE64_TEST_LO 5
+#define TABLE64_TEST_HI 6
+
+/* i210 reg test */
+static struct igb_reg_test reg_test_i210[] = {
+ { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+ { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+ { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+ /* RDH is read-only for i210, only test RDT. */
+ { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0003FFF0, 0x0003FFF0 },
+ { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+ { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+ { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+ { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+ { E1000_RA, 0, 16, TABLE64_TEST_LO,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RA, 0, 16, TABLE64_TEST_HI,
+ 0x900FFFFF, 0xFFFFFFFF },
+ { E1000_MTA, 0, 128, TABLE32_TEST,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0, 0, 0, 0 }
+};
+
+/* i350 reg test */
+static struct igb_reg_test reg_test_i350[] = {
+ { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+ { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+ /* VET is readonly on i350 */
+ { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+ { E1000_RDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_RDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+ /* RDH is read-only for i350, only test RDT. */
+ { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_RDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+ { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+ { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+ { E1000_TDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_TDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_TDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+ { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_TDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+ { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+ { E1000_RA, 0, 16, TABLE64_TEST_LO,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RA, 0, 16, TABLE64_TEST_HI,
+ 0xC3FFFFFF, 0xFFFFFFFF },
+ { E1000_RA2, 0, 16, TABLE64_TEST_LO,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RA2, 0, 16, TABLE64_TEST_HI,
+ 0xC3FFFFFF, 0xFFFFFFFF },
+ { E1000_MTA, 0, 128, TABLE32_TEST,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0, 0, 0, 0 }
+};
+
+/* 82580 reg test */
+static struct igb_reg_test reg_test_82580[] = {
+ { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+ { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+ { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+ { E1000_RDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_RDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+ /* RDH is read-only for 82580, only test RDT. */
+ { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_RDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+ { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+ { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+ { E1000_TDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_TDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_TDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+ { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_TDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+ { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+ { E1000_RA, 0, 16, TABLE64_TEST_LO,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RA, 0, 16, TABLE64_TEST_HI,
+ 0x83FFFFFF, 0xFFFFFFFF },
+ { E1000_RA2, 0, 8, TABLE64_TEST_LO,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RA2, 0, 8, TABLE64_TEST_HI,
+ 0x83FFFFFF, 0xFFFFFFFF },
+ { E1000_MTA, 0, 128, TABLE32_TEST,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0, 0, 0, 0 }
+};
+
+/* 82576 reg test */
+static struct igb_reg_test reg_test_82576[] = {
+ { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+ { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+ { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+ { E1000_RDBAL(4), 0x40, 12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_RDBAH(4), 0x40, 12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RDLEN(4), 0x40, 12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+ /* Enable all queues before testing. */
+ { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
+ { E1000_RXDCTL(4), 0x40, 12, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
+ /* RDH is read-only for 82576, only test RDT. */
+ { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_RDT(4), 0x40, 12, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, 0 },
+ { E1000_RXDCTL(4), 0x40, 12, WRITE_NO_TEST, 0, 0 },
+ { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+ { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+ { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+ { E1000_TDBAL(4), 0x40, 12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_TDBAH(4), 0x40, 12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_TDLEN(4), 0x40, 12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+ { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+ { E1000_RA, 0, 16, TABLE64_TEST_LO,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RA, 0, 16, TABLE64_TEST_HI,
+ 0x83FFFFFF, 0xFFFFFFFF },
+ { E1000_RA2, 0, 8, TABLE64_TEST_LO,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RA2, 0, 8, TABLE64_TEST_HI,
+ 0x83FFFFFF, 0xFFFFFFFF },
+ { E1000_MTA, 0, 128, TABLE32_TEST,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0, 0, 0, 0 }
+};
+
+/* 82575 register test */
+static struct igb_reg_test reg_test_82575[] = {
+ { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+ { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+ { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+ /* Enable all four RX queues before testing. */
+ { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
+ /* RDH is read-only for 82575, only test RDT. */
+ { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, 0 },
+ { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+ { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+ { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB3FE, 0x003FFFFB },
+ { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB3FE, 0xFFFFFFFF },
+ { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+ { E1000_TXCW, 0x100, 1, PATTERN_TEST, 0xC000FFFF, 0x0000FFFF },
+ { E1000_RA, 0, 16, TABLE64_TEST_LO,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { E1000_RA, 0, 16, TABLE64_TEST_HI,
+ 0x800FFFFF, 0xFFFFFFFF },
+ { E1000_MTA, 0, 128, TABLE32_TEST,
+ 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0, 0, 0, 0 }
+};
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
new file mode 100644
index 00000000..015c8952
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
@@ -0,0 +1,436 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+#include <linux/tcp.h>
+
+#include "igb.h"
+#include "igb_vmdq.h"
+#include <linux/if_vlan.h>
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+int igb_vmdq_open(struct net_device *dev)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+ struct net_device *main_netdev = adapter->netdev;
+ int hw_queue = vadapter->rx_ring->queue_index +
+ adapter->vfs_allocated_count;
+
+ if (test_bit(__IGB_DOWN, &adapter->state)) {
+ DPRINTK(DRV, WARNING,
+ "Open %s before opening this device.\n",
+ main_netdev->name);
+ return -EAGAIN;
+ }
+ netif_carrier_off(dev);
+ vadapter->tx_ring->vmdq_netdev = dev;
+ vadapter->rx_ring->vmdq_netdev = dev;
+ if (is_valid_ether_addr(dev->dev_addr)) {
+ igb_del_mac_filter(adapter, dev->dev_addr, hw_queue);
+ igb_add_mac_filter(adapter, dev->dev_addr, hw_queue);
+ }
+ netif_carrier_on(dev);
+ return 0;
+}
+
+int igb_vmdq_close(struct net_device *dev)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+ int hw_queue = vadapter->rx_ring->queue_index +
+ adapter->vfs_allocated_count;
+
+ netif_carrier_off(dev);
+ igb_del_mac_filter(adapter, dev->dev_addr, hw_queue);
+
+ vadapter->tx_ring->vmdq_netdev = NULL;
+ vadapter->rx_ring->vmdq_netdev = NULL;
+ return 0;
+}
+
+netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+
+ return igb_xmit_frame_ring(skb, vadapter->tx_ring);
+}
+
+struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+ struct e1000_hw *hw = &adapter->hw;
+ int hw_queue = vadapter->rx_ring->queue_index +
+ adapter->vfs_allocated_count;
+
+ vadapter->net_stats.rx_packets +=
+ E1000_READ_REG(hw, E1000_PFVFGPRC(hw_queue));
+ E1000_WRITE_REG(hw, E1000_PFVFGPRC(hw_queue), 0);
+ vadapter->net_stats.tx_packets +=
+ E1000_READ_REG(hw, E1000_PFVFGPTC(hw_queue));
+ E1000_WRITE_REG(hw, E1000_PFVFGPTC(hw_queue), 0);
+ vadapter->net_stats.rx_bytes +=
+ E1000_READ_REG(hw, E1000_PFVFGORC(hw_queue));
+ E1000_WRITE_REG(hw, E1000_PFVFGORC(hw_queue), 0);
+ vadapter->net_stats.tx_bytes +=
+ E1000_READ_REG(hw, E1000_PFVFGOTC(hw_queue));
+ E1000_WRITE_REG(hw, E1000_PFVFGOTC(hw_queue), 0);
+ vadapter->net_stats.multicast +=
+ E1000_READ_REG(hw, E1000_PFVFMPRC(hw_queue));
+ E1000_WRITE_REG(hw, E1000_PFVFMPRC(hw_queue), 0);
+ /* only return the current stats */
+ return &vadapter->net_stats;
+}
+
+/**
+ * igb_write_vm_addr_list - write unicast addresses to RAR table
+ * @netdev: network interface device structure
+ *
+ * Writes unicast address list to the RAR table.
+ * Returns: -ENOMEM on failure/insufficient address space
+ * 0 on no addresses written
+ * X on writing X addresses to the RAR table
+ **/
+static int igb_write_vm_addr_list(struct net_device *netdev)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+ int count = 0;
+ int hw_queue = vadapter->rx_ring->queue_index +
+ adapter->vfs_allocated_count;
+
+ /* return ENOMEM indicating insufficient memory for addresses */
+ if (netdev_uc_count(netdev) > igb_available_rars(adapter))
+ return -ENOMEM;
+
+ if (!netdev_uc_empty(netdev)) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+ struct netdev_hw_addr *ha;
+#else
+ struct dev_mc_list *ha;
+#endif
+ netdev_for_each_uc_addr(ha, netdev) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+ igb_del_mac_filter(adapter, ha->addr, hw_queue);
+ igb_add_mac_filter(adapter, ha->addr, hw_queue);
+#else
+ igb_del_mac_filter(adapter, ha->da_addr, hw_queue);
+ igb_add_mac_filter(adapter, ha->da_addr, hw_queue);
+#endif
+ count++;
+ }
+ }
+ return count;
+}
+
+
+#define E1000_VMOLR_UPE 0x20000000 /* Unicast promiscuous mode */
+void igb_vmdq_set_rx_mode(struct net_device *dev)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+ struct e1000_hw *hw = &adapter->hw;
+ u32 vmolr, rctl;
+ int hw_queue = vadapter->rx_ring->queue_index +
+ adapter->vfs_allocated_count;
+
+ /* Check for Promiscuous and All Multicast modes */
+ vmolr = E1000_READ_REG(hw, E1000_VMOLR(hw_queue));
+
+ /* clear the affected bits */
+ vmolr &= ~(E1000_VMOLR_UPE | E1000_VMOLR_MPME |
+ E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE);
+
+ if (dev->flags & IFF_PROMISC) {
+ vmolr |= E1000_VMOLR_UPE;
+ rctl = E1000_READ_REG(hw, E1000_RCTL);
+ rctl |= E1000_RCTL_UPE;
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+ } else {
+ rctl = E1000_READ_REG(hw, E1000_RCTL);
+ rctl &= ~E1000_RCTL_UPE;
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+ if (dev->flags & IFF_ALLMULTI) {
+ vmolr |= E1000_VMOLR_MPME;
+ } else {
+ /*
+ * Write addresses to the MTA, if the attempt fails
+ * then we should just turn on promiscuous mode so
+ * that we can at least receive multicast traffic
+ */
+ if (igb_write_mc_addr_list(adapter->netdev) != 0)
+ vmolr |= E1000_VMOLR_ROMPE;
+ }
+#ifdef HAVE_SET_RX_MODE
+ /*
+ * Write addresses to available RAR registers, if there is not
+ * sufficient space to store all the addresses then enable
+ * unicast promiscuous mode
+ */
+ if (igb_write_vm_addr_list(dev) < 0)
+ vmolr |= E1000_VMOLR_UPE;
+#endif
+ }
+ E1000_WRITE_REG(hw, E1000_VMOLR(hw_queue), vmolr);
+
+ return;
+}
+
+int igb_vmdq_set_mac(struct net_device *dev, void *p)
+{
+ struct sockaddr *addr = p;
+ struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+ int hw_queue = vadapter->rx_ring->queue_index +
+ adapter->vfs_allocated_count;
+
+ igb_del_mac_filter(adapter, dev->dev_addr, hw_queue);
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ return igb_add_mac_filter(adapter, dev->dev_addr, hw_queue);
+}
+
+int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+
+ if (adapter->netdev->mtu < new_mtu) {
+ DPRINTK(PROBE, INFO,
+ "Set MTU on %s to >= %d "
+ "before changing MTU on %s\n",
+ adapter->netdev->name, new_mtu, dev->name);
+ return -EINVAL;
+ }
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+void igb_vmdq_tx_timeout(struct net_device *dev)
+{
+ return;
+}
+
+void igb_vmdq_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+ struct e1000_hw *hw = &adapter->hw;
+ int hw_queue = vadapter->rx_ring->queue_index +
+ adapter->vfs_allocated_count;
+
+ vadapter->vlgrp = grp;
+
+ igb_enable_vlan_tags(adapter);
+ E1000_WRITE_REG(hw, E1000_VMVIR(hw_queue), 0);
+
+ return;
+}
+void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+#ifndef HAVE_NETDEV_VLAN_FEATURES
+ struct net_device *v_netdev;
+#endif
+ int hw_queue = vadapter->rx_ring->queue_index +
+ adapter->vfs_allocated_count;
+
+ /* attempt to add filter to vlvf array */
+ igb_vlvf_set(adapter, vid, TRUE, hw_queue);
+
+#ifndef HAVE_NETDEV_VLAN_FEATURES
+
+ /* Copy feature flags from netdev to the vlan netdev for this vid.
+ * This allows things like TSO to bubble down to our vlan device.
+ */
+ v_netdev = vlan_group_get_device(vadapter->vlgrp, vid);
+ v_netdev->features |= adapter->netdev->features;
+ vlan_group_set_device(vadapter->vlgrp, vid, v_netdev);
+#endif
+
+ return;
+}
+void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+ int hw_queue = vadapter->rx_ring->queue_index +
+ adapter->vfs_allocated_count;
+
+ vlan_group_set_device(vadapter->vlgrp, vid, NULL);
+ /* remove vlan from VLVF table array */
+ igb_vlvf_set(adapter, vid, FALSE, hw_queue);
+
+
+ return;
+}
+
+static int igb_vmdq_get_settings(struct net_device *netdev,
+ struct ethtool_cmd *ecmd)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+ struct e1000_hw *hw = &adapter->hw;
+ u32 status;
+
+ if (hw->phy.media_type == e1000_media_type_copper) {
+
+ ecmd->supported = (SUPPORTED_10baseT_Half |
+ SUPPORTED_10baseT_Full |
+ SUPPORTED_100baseT_Half |
+ SUPPORTED_100baseT_Full |
+ SUPPORTED_1000baseT_Full|
+ SUPPORTED_Autoneg |
+ SUPPORTED_TP);
+ ecmd->advertising = ADVERTISED_TP;
+
+ if (hw->mac.autoneg == 1) {
+ ecmd->advertising |= ADVERTISED_Autoneg;
+ /* the e1000 autoneg seems to match ethtool nicely */
+ ecmd->advertising |= hw->phy.autoneg_advertised;
+ }
+
+ ecmd->port = PORT_TP;
+ ecmd->phy_address = hw->phy.addr;
+ } else {
+ ecmd->supported = (SUPPORTED_1000baseT_Full |
+ SUPPORTED_FIBRE |
+ SUPPORTED_Autoneg);
+
+ ecmd->advertising = (ADVERTISED_1000baseT_Full |
+ ADVERTISED_FIBRE |
+ ADVERTISED_Autoneg);
+
+ ecmd->port = PORT_FIBRE;
+ }
+
+ ecmd->transceiver = XCVR_INTERNAL;
+
+ status = E1000_READ_REG(hw, E1000_STATUS);
+
+ if (status & E1000_STATUS_LU) {
+
+ if ((status & E1000_STATUS_SPEED_1000) ||
+ hw->phy.media_type != e1000_media_type_copper)
+ ecmd->speed = SPEED_1000;
+ else if (status & E1000_STATUS_SPEED_100)
+ ecmd->speed = SPEED_100;
+ else
+ ecmd->speed = SPEED_10;
+
+ if ((status & E1000_STATUS_FD) ||
+ hw->phy.media_type != e1000_media_type_copper)
+ ecmd->duplex = DUPLEX_FULL;
+ else
+ ecmd->duplex = DUPLEX_HALF;
+ } else {
+ ecmd->speed = -1;
+ ecmd->duplex = -1;
+ }
+
+ ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+ return 0;
+}
+
+
+static u32 igb_vmdq_get_msglevel(struct net_device *netdev)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+ return adapter->msg_enable;
+}
+
+static void igb_vmdq_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+ struct net_device *main_netdev = adapter->netdev;
+
+ strncpy(drvinfo->driver, igb_driver_name, 32);
+ strncpy(drvinfo->version, igb_driver_version, 32);
+
+ strncpy(drvinfo->fw_version, "N/A", 4);
+ snprintf(drvinfo->bus_info, 32, "%s VMDQ %d", main_netdev->name,
+ vadapter->rx_ring->queue_index);
+ drvinfo->n_stats = 0;
+ drvinfo->testinfo_len = 0;
+ drvinfo->regdump_len = 0;
+}
+
+static void igb_vmdq_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+
+ struct igb_ring *tx_ring = vadapter->tx_ring;
+ struct igb_ring *rx_ring = vadapter->rx_ring;
+
+ ring->rx_max_pending = IGB_MAX_RXD;
+ ring->tx_max_pending = IGB_MAX_TXD;
+ ring->rx_mini_max_pending = 0;
+ ring->rx_jumbo_max_pending = 0;
+ ring->rx_pending = rx_ring->count;
+ ring->tx_pending = tx_ring->count;
+ ring->rx_mini_pending = 0;
+ ring->rx_jumbo_pending = 0;
+}
+static u32 igb_vmdq_get_rx_csum(struct net_device *netdev)
+{
+ struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+ struct igb_adapter *adapter = vadapter->real_adapter;
+
+ return test_bit(IGB_RING_FLAG_RX_CSUM, &adapter->rx_ring[0]->flags);
+}
+
+
+static struct ethtool_ops igb_vmdq_ethtool_ops = {
+ .get_settings = igb_vmdq_get_settings,
+ .get_drvinfo = igb_vmdq_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_ringparam = igb_vmdq_get_ringparam,
+ .get_rx_csum = igb_vmdq_get_rx_csum,
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = ethtool_op_set_sg,
+ .get_msglevel = igb_vmdq_get_msglevel,
+#ifdef NETIF_F_TSO
+ .get_tso = ethtool_op_get_tso,
+#endif
+#ifdef HAVE_ETHTOOL_GET_PERM_ADDR
+ .get_perm_addr = ethtool_op_get_perm_addr,
+#endif
+};
+
+void igb_vmdq_set_ethtool_ops(struct net_device *netdev)
+{
+ SET_ETHTOOL_OPS(netdev, &igb_vmdq_ethtool_ops);
+}
+
+
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
new file mode 100644
index 00000000..e51e7c4e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
@@ -0,0 +1,46 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IGB_VMDQ_H_
+#define _IGB_VMDQ_H_
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+int igb_vmdq_open(struct net_device *dev);
+int igb_vmdq_close(struct net_device *dev);
+netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev);
+struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev);
+void igb_vmdq_set_rx_mode(struct net_device *dev);
+int igb_vmdq_set_mac(struct net_device *dev, void *addr);
+int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu);
+void igb_vmdq_tx_timeout(struct net_device *dev);
+void igb_vmdq_vlan_rx_register(struct net_device *dev,
+ struct vlan_group *grp);
+void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid);
+void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid);
+void igb_vmdq_set_ethtool_ops(struct net_device *netdev);
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+#endif /* _IGB_VMDQ_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c
new file mode 100644
index 00000000..bde3a83c
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c
@@ -0,0 +1,1482 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "igb.h"
+#include "kcompat.h"
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
+/* From lib/vsprintf.c */
+#include <asm/div64.h>
+
+static int skip_atoi(const char **s)
+{
+ int i=0;
+
+ while (isdigit(**s))
+ i = i*10 + *((*s)++) - '0';
+ return i;
+}
+
+#define _kc_ZEROPAD 1 /* pad with zero */
+#define _kc_SIGN 2 /* unsigned/signed long */
+#define _kc_PLUS 4 /* show plus */
+#define _kc_SPACE 8 /* space if plus */
+#define _kc_LEFT 16 /* left justified */
+#define _kc_SPECIAL 32 /* 0x */
+#define _kc_LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type)
+{
+ char c,sign,tmp[66];
+ const char *digits;
+ const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+ const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ int i;
+
+ digits = (type & _kc_LARGE) ? large_digits : small_digits;
+ if (type & _kc_LEFT)
+ type &= ~_kc_ZEROPAD;
+ if (base < 2 || base > 36)
+ return 0;
+ c = (type & _kc_ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & _kc_SIGN) {
+ if (num < 0) {
+ sign = '-';
+ num = -num;
+ size--;
+ } else if (type & _kc_PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & _kc_SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & _kc_SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++]='0';
+ else while (num != 0)
+ tmp[i++] = digits[do_div(num,base)];
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type&(_kc_ZEROPAD+_kc_LEFT))) {
+ while(size-->0) {
+ if (buf <= end)
+ *buf = ' ';
+ ++buf;
+ }
+ }
+ if (sign) {
+ if (buf <= end)
+ *buf = sign;
+ ++buf;
+ }
+ if (type & _kc_SPECIAL) {
+ if (base==8) {
+ if (buf <= end)
+ *buf = '0';
+ ++buf;
+ } else if (base==16) {
+ if (buf <= end)
+ *buf = '0';
+ ++buf;
+ if (buf <= end)
+ *buf = digits[33];
+ ++buf;
+ }
+ }
+ if (!(type & _kc_LEFT)) {
+ while (size-- > 0) {
+ if (buf <= end)
+ *buf = c;
+ ++buf;
+ }
+ }
+ while (i < precision--) {
+ if (buf <= end)
+ *buf = '0';
+ ++buf;
+ }
+ while (i-- > 0) {
+ if (buf <= end)
+ *buf = tmp[i];
+ ++buf;
+ }
+ while (size-- > 0) {
+ if (buf <= end)
+ *buf = ' ';
+ ++buf;
+ }
+ return buf;
+}
+
+int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ int len;
+ unsigned long long num;
+ int i, base;
+ char *str, *end, c;
+ const char *s;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /* min. # of digits for integers; max
+ number of chars for from string */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+ /* 'z' support added 23/7/1999 S.H. */
+ /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+ str = buf;
+ end = buf + size - 1;
+
+ if (end < buf - 1) {
+ end = ((void *) -1);
+ size = end - buf + 1;
+ }
+
+ for (; *fmt ; ++fmt) {
+ if (*fmt != '%') {
+ if (str <= end)
+ *str = *fmt;
+ ++str;
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+ repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-': flags |= _kc_LEFT; goto repeat;
+ case '+': flags |= _kc_PLUS; goto repeat;
+ case ' ': flags |= _kc_SPACE; goto repeat;
+ case '#': flags |= _kc_SPECIAL; goto repeat;
+ case '0': flags |= _kc_ZEROPAD; goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= _kc_LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if (isdigit(*fmt))
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+ qualifier = *fmt;
+ ++fmt;
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & _kc_LEFT)) {
+ while (--field_width > 0) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ }
+ c = (unsigned char) va_arg(args, int);
+ if (str <= end)
+ *str = c;
+ ++str;
+ while (--field_width > 0) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ if (!s)
+ s = "<NULL>";
+
+ len = strnlen(s, precision);
+
+ if (!(flags & _kc_LEFT)) {
+ while (len < field_width--) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ }
+ for (i = 0; i < len; ++i) {
+ if (str <= end)
+ *str = *s;
+ ++str; ++s;
+ }
+ while (len < field_width--) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2*sizeof(void *);
+ flags |= _kc_ZEROPAD;
+ }
+ str = number(str, end,
+ (unsigned long) va_arg(args, void *),
+ 16, field_width, precision, flags);
+ continue;
+
+
+ case 'n':
+ /* FIXME:
+ * What does C99 say about the overflow case here? */
+ if (qualifier == 'l') {
+ long * ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else if (qualifier == 'Z') {
+ size_t * ip = va_arg(args, size_t *);
+ *ip = (str - buf);
+ } else {
+ int * ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ if (str <= end)
+ *str = '%';
+ ++str;
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'X':
+ flags |= _kc_LARGE;
+ case 'x':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= _kc_SIGN;
+ case 'u':
+ break;
+
+ default:
+ if (str <= end)
+ *str = '%';
+ ++str;
+ if (*fmt) {
+ if (str <= end)
+ *str = *fmt;
+ ++str;
+ } else {
+ --fmt;
+ }
+ continue;
+ }
+ if (qualifier == 'L')
+ num = va_arg(args, long long);
+ else if (qualifier == 'l') {
+ num = va_arg(args, unsigned long);
+ if (flags & _kc_SIGN)
+ num = (signed long) num;
+ } else if (qualifier == 'Z') {
+ num = va_arg(args, size_t);
+ } else if (qualifier == 'h') {
+ num = (unsigned short) va_arg(args, int);
+ if (flags & _kc_SIGN)
+ num = (signed short) num;
+ } else {
+ num = va_arg(args, unsigned int);
+ if (flags & _kc_SIGN)
+ num = (signed int) num;
+ }
+ str = number(str, end, num, base,
+ field_width, precision, flags);
+ }
+ if (str <= end)
+ *str = '\0';
+ else if (size > 0)
+ /* don't write out a null byte if the buf size is zero */
+ *end = '\0';
+ /* the trailing null byte doesn't count towards the total
+ * ++str;
+ */
+ return str-buf;
+}
+
+int _kc_snprintf(char * buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = _kc_vsnprintf(buf,size,fmt,args);
+ va_end(args);
+ return i;
+}
+#endif /* < 2.4.8 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
+
+/**************************************/
+/* PCI DMA MAPPING */
+
+#if defined(CONFIG_HIGHMEM)
+
+#ifndef PCI_DRAM_OFFSET
+#define PCI_DRAM_OFFSET 0
+#endif
+
+u64
+_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
+ size_t size, int direction)
+{
+ return (((u64) (page - mem_map) << PAGE_SHIFT) + offset +
+ PCI_DRAM_OFFSET);
+}
+
+#else /* CONFIG_HIGHMEM */
+
+u64
+_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
+ size_t size, int direction)
+{
+ return pci_map_single(dev, (void *)page_address(page) + offset, size,
+ direction);
+}
+
+#endif /* CONFIG_HIGHMEM */
+
+void
+_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size,
+ int direction)
+{
+ return pci_unmap_single(dev, dma_addr, size, direction);
+}
+
+#endif /* 2.4.13 => 2.4.3 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
+
+/**************************************/
+/* PCI DRIVER API */
+
+int
+_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask)
+{
+ if (!pci_dma_supported(dev, mask))
+ return -EIO;
+ dev->dma_mask = mask;
+ return 0;
+}
+
+int
+_kc_pci_request_regions(struct pci_dev *dev, char *res_name)
+{
+ int i;
+
+ for (i = 0; i < 6; i++) {
+ if (pci_resource_len(dev, i) == 0)
+ continue;
+
+ if (pci_resource_flags(dev, i) & IORESOURCE_IO) {
+ if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
+ pci_release_regions(dev);
+ return -EBUSY;
+ }
+ } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) {
+ if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
+ pci_release_regions(dev);
+ return -EBUSY;
+ }
+ }
+ }
+ return 0;
+}
+
+void
+_kc_pci_release_regions(struct pci_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < 6; i++) {
+ if (pci_resource_len(dev, i) == 0)
+ continue;
+
+ if (pci_resource_flags(dev, i) & IORESOURCE_IO)
+ release_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
+
+ else if (pci_resource_flags(dev, i) & IORESOURCE_MEM)
+ release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
+ }
+}
+
+/**************************************/
+/* NETWORK DRIVER API */
+
+struct net_device *
+_kc_alloc_etherdev(int sizeof_priv)
+{
+ struct net_device *dev;
+ int alloc_size;
+
+ alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31;
+ dev = kzalloc(alloc_size, GFP_KERNEL);
+ if (!dev)
+ return NULL;
+
+ if (sizeof_priv)
+ dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31);
+ dev->name[0] = '\0';
+ ether_setup(dev);
+
+ return dev;
+}
+
+int
+_kc_is_valid_ether_addr(u8 *addr)
+{
+ const char zaddr[6] = { 0, };
+
+ return !(addr[0] & 1) && memcmp(addr, zaddr, 6);
+}
+
+#endif /* 2.4.3 => 2.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
+
+int
+_kc_pci_set_power_state(struct pci_dev *dev, int state)
+{
+ return 0;
+}
+
+int
+_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable)
+{
+ return 0;
+}
+
+#endif /* 2.4.6 => 2.4.3 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page,
+ int off, int size)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ frag->page = page;
+ frag->page_offset = off;
+ frag->size = size;
+ skb_shinfo(skb)->nr_frags = i + 1;
+}
+
+/*
+ * Original Copyright:
+ * find_next_bit.c: fallback find next bit implementation
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + ffs(tmp);
+}
+
+size_t _kc_strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = strlen(src);
+
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ memcpy(dest, src, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
+
+#ifndef do_div
+#if BITS_PER_LONG == 32
+uint32_t __attribute__((weak)) _kc__div64_32(uint64_t *n, uint32_t base)
+{
+ uint64_t rem = *n;
+ uint64_t b = base;
+ uint64_t res, d = 1;
+ uint32_t high = rem >> 32;
+
+ /* Reduce the thing a bit first */
+ res = 0;
+ if (high >= base) {
+ high /= base;
+ res = (uint64_t) high << 32;
+ rem -= (uint64_t) (high*base) << 32;
+ }
+
+ while ((int64_t)b > 0 && b < rem) {
+ b = b+b;
+ d = d+d;
+ }
+
+ do {
+ if (rem >= b) {
+ rem -= b;
+ res += d;
+ }
+ b >>= 1;
+ d >>= 1;
+ } while (d);
+
+ *n = res;
+ return rem;
+}
+#endif /* BITS_PER_LONG == 32 */
+#endif /* do_div */
+#endif /* 2.6.0 => 2.4.6 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+ return (i >= size) ? (size - 1) : i;
+}
+#endif /* < 2.6.4 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
+DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1};
+#endif /* < 2.6.10 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
+char *_kc_kstrdup(const char *s, unsigned int gfp)
+{
+ size_t len;
+ char *buf;
+
+ if (!s)
+ return NULL;
+
+ len = strlen(s) + 1;
+ buf = kmalloc(len, gfp);
+ if (buf)
+ memcpy(buf, s, len);
+ return buf;
+}
+#endif /* < 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
+void *_kc_kzalloc(size_t size, int flags)
+{
+ void *ret = kmalloc(size, flags);
+ if (ret)
+ memset(ret, 0, size);
+ return ret;
+}
+#endif /* <= 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
+int _kc_skb_pad(struct sk_buff *skb, int pad)
+{
+ int ntail;
+
+ /* If the skbuff is non linear tailroom is always zero.. */
+ if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
+ memset(skb->data+skb->len, 0, pad);
+ return 0;
+ }
+
+ ntail = skb->data_len + pad - (skb->end - skb->tail);
+ if (likely(skb_cloned(skb) || ntail > 0)) {
+ if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC));
+ goto free_skb;
+ }
+
+#ifdef MAX_SKB_FRAGS
+ if (skb_is_nonlinear(skb) &&
+ !__pskb_pull_tail(skb, skb->data_len))
+ goto free_skb;
+
+#endif
+ memset(skb->data + skb->len, 0, pad);
+ return 0;
+
+free_skb:
+ kfree_skb(skb);
+ return -ENOMEM;
+}
+
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
+int _kc_pci_save_state(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct adapter_struct *adapter = netdev_priv(netdev);
+ int size = PCI_CONFIG_SPACE_LEN, i;
+ u16 pcie_cap_offset, pcie_link_status;
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
+ /* no ->dev for 2.4 kernels */
+ WARN_ON(pdev->dev.driver_data == NULL);
+#endif
+ pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+ if (pcie_cap_offset) {
+ if (!pci_read_config_word(pdev,
+ pcie_cap_offset + PCIE_LINK_STATUS,
+ &pcie_link_status))
+ size = PCIE_CONFIG_SPACE_LEN;
+ }
+ pci_config_space_ich8lan();
+#ifdef HAVE_PCI_ERS
+ if (adapter->config_space == NULL)
+#else
+ WARN_ON(adapter->config_space != NULL);
+#endif
+ adapter->config_space = kmalloc(size, GFP_KERNEL);
+ if (!adapter->config_space) {
+ printk(KERN_ERR "Out of memory in pci_save_state\n");
+ return -ENOMEM;
+ }
+ for (i = 0; i < (size / 4); i++)
+ pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]);
+ return 0;
+}
+
+void _kc_pci_restore_state(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct adapter_struct *adapter = netdev_priv(netdev);
+ int size = PCI_CONFIG_SPACE_LEN, i;
+ u16 pcie_cap_offset;
+ u16 pcie_link_status;
+
+ if (adapter->config_space != NULL) {
+ pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+ if (pcie_cap_offset &&
+ !pci_read_config_word(pdev,
+ pcie_cap_offset + PCIE_LINK_STATUS,
+ &pcie_link_status))
+ size = PCIE_CONFIG_SPACE_LEN;
+
+ pci_config_space_ich8lan();
+ for (i = 0; i < (size / 4); i++)
+ pci_write_config_dword(pdev, i * 4, adapter->config_space[i]);
+#ifndef HAVE_PCI_ERS
+ kfree(adapter->config_space);
+ adapter->config_space = NULL;
+#endif
+ }
+}
+#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
+
+#ifdef HAVE_PCI_ERS
+void _kc_free_netdev(struct net_device *netdev)
+{
+ struct adapter_struct *adapter = netdev_priv(netdev);
+
+ if (adapter->config_space != NULL)
+ kfree(adapter->config_space);
+#ifdef CONFIG_SYSFS
+ if (netdev->reg_state == NETREG_UNINITIALIZED) {
+ kfree((char *)netdev - netdev->padded);
+ } else {
+ BUG_ON(netdev->reg_state != NETREG_UNREGISTERED);
+ netdev->reg_state = NETREG_RELEASED;
+ class_device_put(&netdev->class_dev);
+ }
+#else
+ kfree((char *)netdev - netdev->padded);
+#endif
+}
+#endif
+
+void *_kc_kmemdup(const void *src, size_t len, unsigned gfp)
+{
+ void *p;
+
+ p = kzalloc(len, gfp);
+ if (p)
+ memcpy(p, src, len);
+ return p;
+}
+#endif /* <= 2.6.19 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
+struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev)
+{
+ return ((struct adapter_struct *)netdev_priv(netdev))->pdev;
+}
+#endif /* < 2.6.21 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+/* hexdump code taken from lib/hexdump.c */
+static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
+ int groupsize, unsigned char *linebuf,
+ size_t linebuflen, bool ascii)
+{
+ const u8 *ptr = buf;
+ u8 ch;
+ int j, lx = 0;
+ int ascii_column;
+
+ if (rowsize != 16 && rowsize != 32)
+ rowsize = 16;
+
+ if (!len)
+ goto nil;
+ if (len > rowsize) /* limit to one line at a time */
+ len = rowsize;
+ if ((len % groupsize) != 0) /* no mixed size output */
+ groupsize = 1;
+
+ switch (groupsize) {
+ case 8: {
+ const u64 *ptr8 = buf;
+ int ngroups = len / groupsize;
+
+ for (j = 0; j < ngroups; j++)
+ lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+ "%s%16.16llx", j ? " " : "",
+ (unsigned long long)*(ptr8 + j));
+ ascii_column = 17 * ngroups + 2;
+ break;
+ }
+
+ case 4: {
+ const u32 *ptr4 = buf;
+ int ngroups = len / groupsize;
+
+ for (j = 0; j < ngroups; j++)
+ lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+ "%s%8.8x", j ? " " : "", *(ptr4 + j));
+ ascii_column = 9 * ngroups + 2;
+ break;
+ }
+
+ case 2: {
+ const u16 *ptr2 = buf;
+ int ngroups = len / groupsize;
+
+ for (j = 0; j < ngroups; j++)
+ lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+ "%s%4.4x", j ? " " : "", *(ptr2 + j));
+ ascii_column = 5 * ngroups + 2;
+ break;
+ }
+
+ default:
+ for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) {
+ ch = ptr[j];
+ linebuf[lx++] = hex_asc(ch >> 4);
+ linebuf[lx++] = hex_asc(ch & 0x0f);
+ linebuf[lx++] = ' ';
+ }
+ if (j)
+ lx--;
+
+ ascii_column = 3 * rowsize + 2;
+ break;
+ }
+ if (!ascii)
+ goto nil;
+
+ while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
+ linebuf[lx++] = ' ';
+ for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
+ linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
+ : '.';
+nil:
+ linebuf[lx++] = '\0';
+}
+
+void _kc_print_hex_dump(const char *level,
+ const char *prefix_str, int prefix_type,
+ int rowsize, int groupsize,
+ const void *buf, size_t len, bool ascii)
+{
+ const u8 *ptr = buf;
+ int i, linelen, remaining = len;
+ unsigned char linebuf[200];
+
+ if (rowsize != 16 && rowsize != 32)
+ rowsize = 16;
+
+ for (i = 0; i < len; i += rowsize) {
+ linelen = min(remaining, rowsize);
+ remaining -= rowsize;
+ _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
+ linebuf, sizeof(linebuf), ascii);
+
+ switch (prefix_type) {
+ case DUMP_PREFIX_ADDRESS:
+ printk("%s%s%*p: %s\n", level, prefix_str,
+ (int)(2 * sizeof(void *)), ptr + i, linebuf);
+ break;
+ case DUMP_PREFIX_OFFSET:
+ printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
+ break;
+ default:
+ printk("%s%s%s\n", level, prefix_str, linebuf);
+ break;
+ }
+ }
+}
+
+#ifdef HAVE_I2C_SUPPORT
+struct i2c_client *
+_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
+{
+ struct i2c_client *client;
+ int status;
+
+ client = kzalloc(sizeof *client, GFP_KERNEL);
+ if (!client)
+ return NULL;
+
+ client->adapter = adap;
+
+ client->dev.platform_data = info->platform_data;
+
+ client->flags = info->flags;
+ client->addr = info->addr;
+
+ strlcpy(client->name, info->type, sizeof(client->name));
+
+ /* Check for address business */
+ status = i2c_check_addr(adap, client->addr);
+ if (status)
+ goto out_err;
+
+ client->dev.parent = &client->adapter->dev;
+ client->dev.bus = &i2c_bus_type;
+
+ status = i2c_attach_client(client);
+ if (status)
+ goto out_err;
+
+ dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n",
+ client->name, dev_name(&client->dev));
+
+ return client;
+
+out_err:
+ dev_err(&adap->dev, "Failed to register i2c client %s at 0x%02x "
+ "(%d)\n", client->name, client->addr, status);
+ kfree(client);
+ return NULL;
+}
+#endif /* HAVE_I2C_SUPPORT */
+#endif /* < 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
+#ifdef NAPI
+struct net_device *napi_to_poll_dev(const struct napi_struct *napi)
+{
+ struct adapter_q_vector *q_vector = container_of(napi,
+ struct adapter_q_vector,
+ napi);
+ return &q_vector->poll_dev;
+}
+
+int __kc_adapter_clean(struct net_device *netdev, int *budget)
+{
+ int work_done;
+ int work_to_do = min(*budget, netdev->quota);
+ /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */
+ struct napi_struct *napi = netdev->priv;
+ work_done = napi->poll(napi, work_to_do);
+ *budget -= work_done;
+ netdev->quota -= work_done;
+ return (work_done >= work_to_do) ? 1 : 0;
+}
+#endif /* NAPI */
+#endif /* <= 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
+void _kc_pci_disable_link_state(struct pci_dev *pdev, int state)
+{
+ struct pci_dev *parent = pdev->bus->self;
+ u16 link_state;
+ int pos;
+
+ if (!parent)
+ return;
+
+ pos = pci_find_capability(parent, PCI_CAP_ID_EXP);
+ if (pos) {
+ pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state);
+ link_state &= ~state;
+ pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state);
+ }
+}
+#endif /* < 2.6.26 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
+#ifdef HAVE_TX_MQ
+void _kc_netif_tx_stop_all_queues(struct net_device *netdev)
+{
+ struct adapter_struct *adapter = netdev_priv(netdev);
+ int i;
+
+ netif_stop_queue(netdev);
+ if (netif_is_multiqueue(netdev))
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ netif_stop_subqueue(netdev, i);
+}
+void _kc_netif_tx_wake_all_queues(struct net_device *netdev)
+{
+ struct adapter_struct *adapter = netdev_priv(netdev);
+ int i;
+
+ netif_wake_queue(netdev);
+ if (netif_is_multiqueue(netdev))
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ netif_wake_subqueue(netdev, i);
+}
+void _kc_netif_tx_start_all_queues(struct net_device *netdev)
+{
+ struct adapter_struct *adapter = netdev_priv(netdev);
+ int i;
+
+ netif_start_queue(netdev);
+ if (netif_is_multiqueue(netdev))
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ netif_start_subqueue(netdev, i);
+}
+#endif /* HAVE_TX_MQ */
+
+#ifndef __WARN_printf
+void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...)
+{
+ va_list args;
+
+ printk(KERN_WARNING "------------[ cut here ]------------\n");
+ printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line);
+ va_start(args, fmt);
+ vprintk(fmt, args);
+ va_end(args);
+
+ dump_stack();
+}
+#endif /* __WARN_printf */
+#endif /* < 2.6.27 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
+
+int
+_kc_pci_prepare_to_sleep(struct pci_dev *dev)
+{
+ pci_power_t target_state;
+ int error;
+
+ target_state = pci_choose_state(dev, PMSG_SUSPEND);
+
+ pci_enable_wake(dev, target_state, true);
+
+ error = pci_set_power_state(dev, target_state);
+
+ if (error)
+ pci_enable_wake(dev, target_state, false);
+
+ return error;
+}
+
+int
+_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable)
+{
+ int err;
+
+ err = pci_enable_wake(dev, PCI_D3cold, enable);
+ if (err)
+ goto out;
+
+ err = pci_enable_wake(dev, PCI_D3hot, enable);
+
+out:
+ return err;
+}
+#endif /* < 2.6.28 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
+static void __kc_pci_set_master(struct pci_dev *pdev, bool enable)
+{
+ u16 old_cmd, cmd;
+
+ pci_read_config_word(pdev, PCI_COMMAND, &old_cmd);
+ if (enable)
+ cmd = old_cmd | PCI_COMMAND_MASTER;
+ else
+ cmd = old_cmd & ~PCI_COMMAND_MASTER;
+ if (cmd != old_cmd) {
+ dev_dbg(pci_dev_to_dev(pdev), "%s bus mastering\n",
+ enable ? "enabling" : "disabling");
+ pci_write_config_word(pdev, PCI_COMMAND, cmd);
+ }
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) )
+ pdev->is_busmaster = enable;
+#endif
+}
+
+void _kc_pci_clear_master(struct pci_dev *dev)
+{
+ __kc_pci_set_master(dev, false);
+}
+#endif /* < 2.6.29 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) )
+#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
+int _kc_pci_num_vf(struct pci_dev *dev)
+{
+ int num_vf = 0;
+#ifdef CONFIG_PCI_IOV
+ struct pci_dev *vfdev;
+
+ /* loop through all ethernet devices starting at PF dev */
+ vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, NULL);
+ while (vfdev) {
+ if (vfdev->is_virtfn && vfdev->physfn == dev)
+ num_vf++;
+
+ vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, vfdev);
+ }
+
+#endif
+ return num_vf;
+}
+#endif /* RHEL_RELEASE_CODE */
+#endif /* < 2.6.34 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
+#ifdef HAVE_TX_MQ
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)))
+#ifndef CONFIG_NETDEVICES_MULTIQUEUE
+void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+ unsigned int real_num = dev->real_num_tx_queues;
+ struct Qdisc *qdisc;
+ int i;
+
+ if (unlikely(txq > dev->num_tx_queues))
+ ;
+ else if (txq > real_num)
+ dev->real_num_tx_queues = txq;
+ else if ( txq < real_num) {
+ dev->real_num_tx_queues = txq;
+ for (i = txq; i < dev->num_tx_queues; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+ if (qdisc) {
+ spin_lock_bh(qdisc_lock(qdisc));
+ qdisc_reset(qdisc);
+ spin_unlock_bh(qdisc_lock(qdisc));
+ }
+ }
+ }
+}
+#endif /* CONFIG_NETDEVICES_MULTIQUEUE */
+#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */
+#endif /* HAVE_TX_MQ */
+
+ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
+ const void __user *from, size_t count)
+{
+ loff_t pos = *ppos;
+ size_t res;
+
+ if (pos < 0)
+ return -EINVAL;
+ if (pos >= available || !count)
+ return 0;
+ if (count > available - pos)
+ count = available - pos;
+ res = copy_from_user(to + pos, from, count);
+ if (res == count)
+ return -EFAULT;
+ count -= res;
+ *ppos = pos + count;
+ return count;
+}
+
+#endif /* < 2.6.35 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
+static const u32 _kc_flags_dup_features =
+ (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
+
+u32 _kc_ethtool_op_get_flags(struct net_device *dev)
+{
+ return dev->features & _kc_flags_dup_features;
+}
+
+int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
+{
+ if (data & ~supported)
+ return -EINVAL;
+
+ dev->features = ((dev->features & ~_kc_flags_dup_features) |
+ (data & _kc_flags_dup_features));
+ return 0;
+}
+#endif /* < 2.6.36 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)))
+
+
+
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */
+#endif /* < 2.6.39 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
+void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
+ int off, int size, unsigned int truesize)
+{
+ skb_fill_page_desc(skb, i, page, off, size);
+ skb->len += size;
+ skb->data_len += size;
+ skb->truesize += truesize;
+}
+
+int _kc_simple_open(struct inode *inode, struct file *file)
+{
+ if (inode->i_private)
+ file->private_data = inode->i_private;
+
+ return 0;
+}
+
+#endif /* < 3.4.0 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) )
+#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \
+ !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5))
+static inline int __kc_pcie_cap_version(struct pci_dev *dev)
+{
+ int pos;
+ u16 reg16;
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (!pos)
+ return 0;
+ pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &reg16);
+ return reg16 & PCI_EXP_FLAGS_VERS;
+}
+
+static inline bool __kc_pcie_cap_has_devctl(const struct pci_dev __always_unused *dev)
+{
+ return true;
+}
+
+static inline bool __kc_pcie_cap_has_lnkctl(struct pci_dev *dev)
+{
+ int type = pci_pcie_type(dev);
+
+ return __kc_pcie_cap_version(dev) > 1 ||
+ type == PCI_EXP_TYPE_ROOT_PORT ||
+ type == PCI_EXP_TYPE_ENDPOINT ||
+ type == PCI_EXP_TYPE_LEG_END;
+}
+
+static inline bool __kc_pcie_cap_has_sltctl(struct pci_dev *dev)
+{
+ int type = pci_pcie_type(dev);
+ int pos;
+ u16 pcie_flags_reg;
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (!pos)
+ return 0;
+ pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &pcie_flags_reg);
+
+ return __kc_pcie_cap_version(dev) > 1 ||
+ type == PCI_EXP_TYPE_ROOT_PORT ||
+ (type == PCI_EXP_TYPE_DOWNSTREAM &&
+ pcie_flags_reg & PCI_EXP_FLAGS_SLOT);
+}
+
+static inline bool __kc_pcie_cap_has_rtctl(struct pci_dev *dev)
+{
+ int type = pci_pcie_type(dev);
+
+ return __kc_pcie_cap_version(dev) > 1 ||
+ type == PCI_EXP_TYPE_ROOT_PORT ||
+ type == PCI_EXP_TYPE_RC_EC;
+}
+
+static bool __kc_pcie_capability_reg_implemented(struct pci_dev *dev, int pos)
+{
+ if (!pci_is_pcie(dev))
+ return false;
+
+ switch (pos) {
+ case PCI_EXP_FLAGS_TYPE:
+ return true;
+ case PCI_EXP_DEVCAP:
+ case PCI_EXP_DEVCTL:
+ case PCI_EXP_DEVSTA:
+ return __kc_pcie_cap_has_devctl(dev);
+ case PCI_EXP_LNKCAP:
+ case PCI_EXP_LNKCTL:
+ case PCI_EXP_LNKSTA:
+ return __kc_pcie_cap_has_lnkctl(dev);
+ case PCI_EXP_SLTCAP:
+ case PCI_EXP_SLTCTL:
+ case PCI_EXP_SLTSTA:
+ return __kc_pcie_cap_has_sltctl(dev);
+ case PCI_EXP_RTCTL:
+ case PCI_EXP_RTCAP:
+ case PCI_EXP_RTSTA:
+ return __kc_pcie_cap_has_rtctl(dev);
+ case PCI_EXP_DEVCAP2:
+ case PCI_EXP_DEVCTL2:
+ case PCI_EXP_LNKCAP2:
+ case PCI_EXP_LNKCTL2:
+ case PCI_EXP_LNKSTA2:
+ return __kc_pcie_cap_version(dev) > 1;
+ default:
+ return false;
+ }
+}
+
+/*
+ * Note that these accessor functions are only for the "PCI Express
+ * Capability" (see PCIe spec r3.0, sec 7.8). They do not apply to the
+ * other "PCI Express Extended Capabilities" (AER, VC, ACS, MFVC, etc.)
+ */
+int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val)
+{
+ int ret;
+
+ *val = 0;
+ if (pos & 1)
+ return -EINVAL;
+
+ if (__kc_pcie_capability_reg_implemented(dev, pos)) {
+ ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val);
+ /*
+ * Reset *val to 0 if pci_read_config_word() fails, it may
+ * have been written as 0xFFFF if hardware error happens
+ * during pci_read_config_word().
+ */
+ if (ret)
+ *val = 0;
+ return ret;
+ }
+
+ /*
+ * For Functions that do not implement the Slot Capabilities,
+ * Slot Status, and Slot Control registers, these spaces must
+ * be hardwired to 0b, with the exception of the Presence Detect
+ * State bit in the Slot Status register of Downstream Ports,
+ * which must be hardwired to 1b. (PCIe Base Spec 3.0, sec 7.8)
+ */
+ if (pci_is_pcie(dev) && pos == PCI_EXP_SLTSTA &&
+ pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) {
+ *val = PCI_EXP_SLTSTA_PDS;
+ }
+
+ return 0;
+}
+
+int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val)
+{
+ if (pos & 1)
+ return -EINVAL;
+
+ if (!__kc_pcie_capability_reg_implemented(dev, pos))
+ return 0;
+
+ return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val);
+}
+
+int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
+ u16 clear, u16 set)
+{
+ int ret;
+ u16 val;
+
+ ret = __kc_pcie_capability_read_word(dev, pos, &val);
+ if (!ret) {
+ val &= ~clear;
+ val |= set;
+ ret = __kc_pcie_capability_write_word(dev, pos, val);
+ }
+
+ return ret;
+}
+#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \
+ !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) */
+#endif /* < 3.7.0 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) )
+#endif /* 3.9.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#ifdef CONFIG_PCI_IOV
+int __kc_pci_vfs_assigned(struct pci_dev *dev)
+{
+ unsigned int vfs_assigned = 0;
+#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
+ int pos;
+ struct pci_dev *vfdev;
+ unsigned short dev_id;
+
+ /* only search if we are a PF */
+ if (!dev->is_physfn)
+ return 0;
+
+ /* find SR-IOV capability */
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+ if (!pos)
+ return 0;
+
+ /*
+ * determine the device ID for the VFs, the vendor ID will be the
+ * same as the PF so there is no need to check for that one
+ */
+ pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id);
+
+ /* loop through all the VFs to see if we own any that are assigned */
+ vfdev = pci_get_device(dev->vendor, dev_id, NULL);
+ while (vfdev) {
+ /*
+ * It is considered assigned if it is a virtual function with
+ * our dev as the physical function and the assigned bit is set
+ */
+ if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
+ (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED))
+ vfs_assigned++;
+
+ vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
+ }
+
+#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */
+ return vfs_assigned;
+}
+
+#endif /* CONFIG_PCI_IOV */
+#endif /* 3.10.0 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
new file mode 100644
index 00000000..e2cf71e0
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
@@ -0,0 +1,3918 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _KCOMPAT_H_
+#define _KCOMPAT_H_
+
+#ifndef LINUX_VERSION_CODE
+#include <linux/version.h>
+#else
+#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+#endif
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/mii.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+
+/* NAPI enable/disable flags here */
+#define NAPI
+
+#define adapter_struct igb_adapter
+#define adapter_q_vector igb_q_vector
+#define NAPI
+
+/* and finally set defines so that the code sees the changes */
+#ifdef NAPI
+#else
+#endif /* NAPI */
+
+/* packet split disable/enable */
+#ifdef DISABLE_PACKET_SPLIT
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+#define CONFIG_IGB_DISABLE_PACKET_SPLIT
+#endif
+#endif /* DISABLE_PACKET_SPLIT */
+
+/* MSI compatibility code for all kernels and drivers */
+#ifdef DISABLE_PCI_MSI
+#undef CONFIG_PCI_MSI
+#endif
+#ifndef CONFIG_PCI_MSI
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
+struct msix_entry {
+ u16 vector; /* kernel uses to write allocated vector */
+ u16 entry; /* driver uses to specify entry, OS writes */
+};
+#endif
+#undef pci_enable_msi
+#define pci_enable_msi(a) -ENOTSUPP
+#undef pci_disable_msi
+#define pci_disable_msi(a) do {} while (0)
+#undef pci_enable_msix
+#define pci_enable_msix(a, b, c) -ENOTSUPP
+#undef pci_disable_msix
+#define pci_disable_msix(a) do {} while (0)
+#define msi_remove_pci_irq_vectors(a) do {} while (0)
+#endif /* CONFIG_PCI_MSI */
+#ifdef DISABLE_PM
+#undef CONFIG_PM
+#endif
+
+#ifdef DISABLE_NET_POLL_CONTROLLER
+#undef CONFIG_NET_POLL_CONTROLLER
+#endif
+
+#ifndef PMSG_SUSPEND
+#define PMSG_SUSPEND 3
+#endif
+
+/* generic boolean compatibility */
+#undef TRUE
+#undef FALSE
+#define TRUE true
+#define FALSE false
+#ifdef GCC_VERSION
+#if ( GCC_VERSION < 3000 )
+#define _Bool char
+#endif
+#else
+#define _Bool char
+#endif
+
+/* kernels less than 2.4.14 don't have this */
+#ifndef ETH_P_8021Q
+#define ETH_P_8021Q 0x8100
+#endif
+
+#ifndef module_param
+#define module_param(v,t,p) MODULE_PARM(v, "i");
+#endif
+
+#ifndef DMA_64BIT_MASK
+#define DMA_64BIT_MASK 0xffffffffffffffffULL
+#endif
+
+#ifndef DMA_32BIT_MASK
+#define DMA_32BIT_MASK 0x00000000ffffffffULL
+#endif
+
+#ifndef PCI_CAP_ID_EXP
+#define PCI_CAP_ID_EXP 0x10
+#endif
+
+#ifndef PCIE_LINK_STATE_L0S
+#define PCIE_LINK_STATE_L0S 1
+#endif
+#ifndef PCIE_LINK_STATE_L1
+#define PCIE_LINK_STATE_L1 2
+#endif
+
+#ifndef mmiowb
+#ifdef CONFIG_IA64
+#define mmiowb() asm volatile ("mf.a" ::: "memory")
+#else
+#define mmiowb()
+#endif
+#endif
+
+#ifndef SET_NETDEV_DEV
+#define SET_NETDEV_DEV(net, pdev)
+#endif
+
+#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
+#define free_netdev(x) kfree(x)
+#endif
+
+#ifdef HAVE_POLL_CONTROLLER
+#define CONFIG_NET_POLL_CONTROLLER
+#endif
+
+#ifndef SKB_DATAREF_SHIFT
+/* if we do not have the infrastructure to detect if skb_header is cloned
+ just return false in all cases */
+#define skb_header_cloned(x) 0
+#endif
+
+#ifndef NETIF_F_GSO
+#define gso_size tso_size
+#define gso_segs tso_segs
+#endif
+
+#ifndef NETIF_F_GRO
+#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \
+ vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan)
+#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb)
+#endif
+
+#ifndef NETIF_F_SCTP_CSUM
+#define NETIF_F_SCTP_CSUM 0
+#endif
+
+#ifndef NETIF_F_LRO
+#define NETIF_F_LRO (1 << 15)
+#endif
+
+#ifndef NETIF_F_NTUPLE
+#define NETIF_F_NTUPLE (1 << 27)
+#endif
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132
+#endif
+
+#ifndef CHECKSUM_PARTIAL
+#define CHECKSUM_PARTIAL CHECKSUM_HW
+#define CHECKSUM_COMPLETE CHECKSUM_HW
+#endif
+
+#ifndef __read_mostly
+#define __read_mostly
+#endif
+
+#ifndef MII_RESV1
+#define MII_RESV1 0x17 /* Reserved... */
+#endif
+
+#ifndef unlikely
+#define unlikely(_x) _x
+#define likely(_x) _x
+#endif
+
+#ifndef WARN_ON
+#define WARN_ON(x)
+#endif
+
+#ifndef PCI_DEVICE
+#define PCI_DEVICE(vend,dev) \
+ .vendor = (vend), .device = (dev), \
+ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
+#endif
+
+#ifndef node_online
+#define node_online(node) ((node) == 0)
+#endif
+
+#ifndef num_online_cpus
+#define num_online_cpus() smp_num_cpus
+#endif
+
+#ifndef cpu_online
+#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map)
+#endif
+
+#ifndef _LINUX_RANDOM_H
+#include <linux/random.h>
+#endif
+
+#ifndef DECLARE_BITMAP
+#ifndef BITS_TO_LONGS
+#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#endif
+#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)]
+#endif
+
+#ifndef VLAN_HLEN
+#define VLAN_HLEN 4
+#endif
+
+#ifndef VLAN_ETH_HLEN
+#define VLAN_ETH_HLEN 18
+#endif
+
+#ifndef VLAN_ETH_FRAME_LEN
+#define VLAN_ETH_FRAME_LEN 1518
+#endif
+
+#if !defined(IXGBE_DCA) && !defined(IGB_DCA)
+#define dca_get_tag(b) 0
+#define dca_add_requester(a) -1
+#define dca_remove_requester(b) do { } while(0)
+#define DCA_PROVIDER_ADD 0x0001
+#define DCA_PROVIDER_REMOVE 0x0002
+#endif
+
+#ifndef DCA_GET_TAG_TWO_ARGS
+#define dca3_get_tag(a,b) dca_get_tag(b)
+#endif
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#if defined(__i386__) || defined(__x86_64__)
+#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#endif
+#endif
+
+/* taken from 2.6.24 definition in linux/kernel.h */
+#ifndef IS_ALIGNED
+#define IS_ALIGNED(x,a) (((x) % ((typeof(x))(a))) == 0)
+#endif
+
+#ifdef IS_ENABLED
+#undef IS_ENABLED
+#undef __ARG_PLACEHOLDER_1
+#undef config_enabled
+#undef _config_enabled
+#undef __config_enabled
+#undef ___config_enabled
+#endif
+
+#define __ARG_PLACEHOLDER_1 0,
+#define config_enabled(cfg) _config_enabled(cfg)
+#define _config_enabled(value) __config_enabled(__ARG_PLACEHOLDER_##value)
+#define __config_enabled(arg1_or_junk) ___config_enabled(arg1_or_junk 1, 0)
+#define ___config_enabled(__ignored, val, ...) val
+
+#define IS_ENABLED(option) \
+ (config_enabled(option) || config_enabled(option##_MODULE))
+
+#if !defined(NETIF_F_HW_VLAN_TX) && !defined(NETIF_F_HW_VLAN_CTAG_TX)
+struct _kc_vlan_ethhdr {
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+#define vlan_ethhdr _kc_vlan_ethhdr
+struct _kc_vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+#define vlan_hdr _kc_vlan_hdr
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#define vlan_tx_tag_present(_skb) 0
+#define vlan_tx_tag_get(_skb) 0
+#endif
+#endif /* NETIF_F_HW_VLAN_TX && NETIF_F_HW_VLAN_CTAG_TX */
+
+#ifndef VLAN_PRIO_SHIFT
+#define VLAN_PRIO_SHIFT 13
+#endif
+
+
+#ifndef __GFP_COLD
+#define __GFP_COLD 0
+#endif
+
+#ifndef __GFP_COMP
+#define __GFP_COMP 0
+#endif
+
+/*****************************************************************************/
+/* Installations with ethtool version without eeprom, adapter id, or statistics
+ * support */
+
+#ifndef ETH_GSTRING_LEN
+#define ETH_GSTRING_LEN 32
+#endif
+
+#ifndef ETHTOOL_GSTATS
+#define ETHTOOL_GSTATS 0x1d
+#undef ethtool_drvinfo
+#define ethtool_drvinfo k_ethtool_drvinfo
+struct k_ethtool_drvinfo {
+ u32 cmd;
+ char driver[32];
+ char version[32];
+ char fw_version[32];
+ char bus_info[32];
+ char reserved1[32];
+ char reserved2[16];
+ u32 n_stats;
+ u32 testinfo_len;
+ u32 eedump_len;
+ u32 regdump_len;
+};
+
+struct ethtool_stats {
+ u32 cmd;
+ u32 n_stats;
+ u64 data[0];
+};
+#endif /* ETHTOOL_GSTATS */
+
+#ifndef ETHTOOL_PHYS_ID
+#define ETHTOOL_PHYS_ID 0x1c
+#endif /* ETHTOOL_PHYS_ID */
+
+#ifndef ETHTOOL_GSTRINGS
+#define ETHTOOL_GSTRINGS 0x1b
+enum ethtool_stringset {
+ ETH_SS_TEST = 0,
+ ETH_SS_STATS,
+};
+struct ethtool_gstrings {
+ u32 cmd; /* ETHTOOL_GSTRINGS */
+ u32 string_set; /* string set id e.c. ETH_SS_TEST, etc*/
+ u32 len; /* number of strings in the string set */
+ u8 data[0];
+};
+#endif /* ETHTOOL_GSTRINGS */
+
+#ifndef ETHTOOL_TEST
+#define ETHTOOL_TEST 0x1a
+enum ethtool_test_flags {
+ ETH_TEST_FL_OFFLINE = (1 << 0),
+ ETH_TEST_FL_FAILED = (1 << 1),
+};
+struct ethtool_test {
+ u32 cmd;
+ u32 flags;
+ u32 reserved;
+ u32 len;
+ u64 data[0];
+};
+#endif /* ETHTOOL_TEST */
+
+#ifndef ETHTOOL_GEEPROM
+#define ETHTOOL_GEEPROM 0xb
+#undef ETHTOOL_GREGS
+struct ethtool_eeprom {
+ u32 cmd;
+ u32 magic;
+ u32 offset;
+ u32 len;
+ u8 data[0];
+};
+
+struct ethtool_value {
+ u32 cmd;
+ u32 data;
+};
+#endif /* ETHTOOL_GEEPROM */
+
+#ifndef ETHTOOL_GLINK
+#define ETHTOOL_GLINK 0xa
+#endif /* ETHTOOL_GLINK */
+
+#ifndef ETHTOOL_GWOL
+#define ETHTOOL_GWOL 0x5
+#define ETHTOOL_SWOL 0x6
+#define SOPASS_MAX 6
+struct ethtool_wolinfo {
+ u32 cmd;
+ u32 supported;
+ u32 wolopts;
+ u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */
+};
+#endif /* ETHTOOL_GWOL */
+
+#ifndef ETHTOOL_GREGS
+#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers */
+#define ethtool_regs _kc_ethtool_regs
+/* for passing big chunks of data */
+struct _kc_ethtool_regs {
+ u32 cmd;
+ u32 version; /* driver-specific, indicates different chips/revs */
+ u32 len; /* bytes */
+ u8 data[0];
+};
+#endif /* ETHTOOL_GREGS */
+
+#ifndef ETHTOOL_GMSGLVL
+#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */
+#endif
+#ifndef ETHTOOL_SMSGLVL
+#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */
+#endif
+#ifndef ETHTOOL_NWAY_RST
+#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv */
+#endif
+#ifndef ETHTOOL_GLINK
+#define ETHTOOL_GLINK 0x0000000a /* Get link status */
+#endif
+#ifndef ETHTOOL_GEEPROM
+#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */
+#endif
+#ifndef ETHTOOL_SEEPROM
+#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data */
+#endif
+#ifndef ETHTOOL_GCOALESCE
+#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */
+/* for configuring coalescing parameters of chip */
+#define ethtool_coalesce _kc_ethtool_coalesce
+struct _kc_ethtool_coalesce {
+ u32 cmd; /* ETHTOOL_{G,S}COALESCE */
+
+ /* How many usecs to delay an RX interrupt after
+ * a packet arrives. If 0, only rx_max_coalesced_frames
+ * is used.
+ */
+ u32 rx_coalesce_usecs;
+
+ /* How many packets to delay an RX interrupt after
+ * a packet arrives. If 0, only rx_coalesce_usecs is
+ * used. It is illegal to set both usecs and max frames
+ * to zero as this would cause RX interrupts to never be
+ * generated.
+ */
+ u32 rx_max_coalesced_frames;
+
+ /* Same as above two parameters, except that these values
+ * apply while an IRQ is being serviced by the host. Not
+ * all cards support this feature and the values are ignored
+ * in that case.
+ */
+ u32 rx_coalesce_usecs_irq;
+ u32 rx_max_coalesced_frames_irq;
+
+ /* How many usecs to delay a TX interrupt after
+ * a packet is sent. If 0, only tx_max_coalesced_frames
+ * is used.
+ */
+ u32 tx_coalesce_usecs;
+
+ /* How many packets to delay a TX interrupt after
+ * a packet is sent. If 0, only tx_coalesce_usecs is
+ * used. It is illegal to set both usecs and max frames
+ * to zero as this would cause TX interrupts to never be
+ * generated.
+ */
+ u32 tx_max_coalesced_frames;
+
+ /* Same as above two parameters, except that these values
+ * apply while an IRQ is being serviced by the host. Not
+ * all cards support this feature and the values are ignored
+ * in that case.
+ */
+ u32 tx_coalesce_usecs_irq;
+ u32 tx_max_coalesced_frames_irq;
+
+ /* How many usecs to delay in-memory statistics
+ * block updates. Some drivers do not have an in-memory
+ * statistic block, and in such cases this value is ignored.
+ * This value must not be zero.
+ */
+ u32 stats_block_coalesce_usecs;
+
+ /* Adaptive RX/TX coalescing is an algorithm implemented by
+ * some drivers to improve latency under low packet rates and
+ * improve throughput under high packet rates. Some drivers
+ * only implement one of RX or TX adaptive coalescing. Anything
+ * not implemented by the driver causes these values to be
+ * silently ignored.
+ */
+ u32 use_adaptive_rx_coalesce;
+ u32 use_adaptive_tx_coalesce;
+
+ /* When the packet rate (measured in packets per second)
+ * is below pkt_rate_low, the {rx,tx}_*_low parameters are
+ * used.
+ */
+ u32 pkt_rate_low;
+ u32 rx_coalesce_usecs_low;
+ u32 rx_max_coalesced_frames_low;
+ u32 tx_coalesce_usecs_low;
+ u32 tx_max_coalesced_frames_low;
+
+ /* When the packet rate is below pkt_rate_high but above
+ * pkt_rate_low (both measured in packets per second) the
+ * normal {rx,tx}_* coalescing parameters are used.
+ */
+
+ /* When the packet rate is (measured in packets per second)
+ * is above pkt_rate_high, the {rx,tx}_*_high parameters are
+ * used.
+ */
+ u32 pkt_rate_high;
+ u32 rx_coalesce_usecs_high;
+ u32 rx_max_coalesced_frames_high;
+ u32 tx_coalesce_usecs_high;
+ u32 tx_max_coalesced_frames_high;
+
+ /* How often to do adaptive coalescing packet rate sampling,
+ * measured in seconds. Must not be zero.
+ */
+ u32 rate_sample_interval;
+};
+#endif /* ETHTOOL_GCOALESCE */
+
+#ifndef ETHTOOL_SCOALESCE
+#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */
+#endif
+#ifndef ETHTOOL_GRINGPARAM
+#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */
+/* for configuring RX/TX ring parameters */
+#define ethtool_ringparam _kc_ethtool_ringparam
+struct _kc_ethtool_ringparam {
+ u32 cmd; /* ETHTOOL_{G,S}RINGPARAM */
+
+ /* Read only attributes. These indicate the maximum number
+ * of pending RX/TX ring entries the driver will allow the
+ * user to set.
+ */
+ u32 rx_max_pending;
+ u32 rx_mini_max_pending;
+ u32 rx_jumbo_max_pending;
+ u32 tx_max_pending;
+
+ /* Values changeable by the user. The valid values are
+ * in the range 1 to the "*_max_pending" counterpart above.
+ */
+ u32 rx_pending;
+ u32 rx_mini_pending;
+ u32 rx_jumbo_pending;
+ u32 tx_pending;
+};
+#endif /* ETHTOOL_GRINGPARAM */
+
+#ifndef ETHTOOL_SRINGPARAM
+#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */
+#endif
+#ifndef ETHTOOL_GPAUSEPARAM
+#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */
+/* for configuring link flow control parameters */
+#define ethtool_pauseparam _kc_ethtool_pauseparam
+struct _kc_ethtool_pauseparam {
+ u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */
+
+ /* If the link is being auto-negotiated (via ethtool_cmd.autoneg
+ * being true) the user may set 'autoneg' here non-zero to have the
+ * pause parameters be auto-negotiated too. In such a case, the
+ * {rx,tx}_pause values below determine what capabilities are
+ * advertised.
+ *
+ * If 'autoneg' is zero or the link is not being auto-negotiated,
+ * then {rx,tx}_pause force the driver to use/not-use pause
+ * flow control.
+ */
+ u32 autoneg;
+ u32 rx_pause;
+ u32 tx_pause;
+};
+#endif /* ETHTOOL_GPAUSEPARAM */
+
+#ifndef ETHTOOL_SPAUSEPARAM
+#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */
+#endif
+#ifndef ETHTOOL_GRXCSUM
+#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_SRXCSUM
+#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_GTXCSUM
+#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_STXCSUM
+#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_GSG
+#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable
+ * (ethtool_value) */
+#endif
+#ifndef ETHTOOL_SSG
+#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable
+ * (ethtool_value). */
+#endif
+#ifndef ETHTOOL_TEST
+#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */
+#endif
+#ifndef ETHTOOL_GSTRINGS
+#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */
+#endif
+#ifndef ETHTOOL_PHYS_ID
+#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */
+#endif
+#ifndef ETHTOOL_GSTATS
+#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */
+#endif
+#ifndef ETHTOOL_GTSO
+#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_STSO
+#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */
+#endif
+
+#ifndef ETHTOOL_BUSINFO_LEN
+#define ETHTOOL_BUSINFO_LEN 32
+#endif
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b))
+#endif
+#ifndef AX_RELEASE_VERSION
+#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b))
+#endif
+
+#ifndef AX_RELEASE_CODE
+#define AX_RELEASE_CODE 0
+#endif
+
+#if (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,0))
+#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,0)
+#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,1))
+#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,1)
+#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,2))
+#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,3)
+#endif
+
+#ifndef RHEL_RELEASE_CODE
+/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */
+#define RHEL_RELEASE_CODE 0
+#endif
+
+/* SuSE version macro is the same as Linux kernel version */
+#ifndef SLE_VERSION
+#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c)
+#endif
+#ifdef CONFIG_SUSE_KERNEL
+#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) )
+/* SLES11 GA is 2.6.27 based */
+#define SLE_VERSION_CODE SLE_VERSION(11,0,0)
+#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) )
+/* SLES11 SP1 is 2.6.32 based */
+#define SLE_VERSION_CODE SLE_VERSION(11,1,0)
+#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,61)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0)))
+/* SLES11 SP3 is at least 3.0.61+ based */
+#define SLE_VERSION_CODE SLE_VERSION(11,3,0)
+#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) )
+/* SLES12 is at least 3.12.28+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12,0,0)
+#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */
+#endif /* CONFIG_SUSE_KERNEL */
+#ifndef SLE_VERSION_CODE
+#define SLE_VERSION_CODE 0
+#endif /* SLE_VERSION_CODE */
+
+/* Ubuntu release and kernel codes must be specified from Makefile */
+#ifndef UBUNTU_RELEASE_VERSION
+#define UBUNTU_RELEASE_VERSION(a,b) (((a) * 100) + (b))
+#endif
+#ifndef UBUNTU_KERNEL_VERSION
+#define UBUNTU_KERNEL_VERSION(a,b,c,abi,upload) (((a) << 40) + ((b) << 32) + ((c) << 24) + ((abi) << 8) + (upload))
+#endif
+#ifndef UBUNTU_RELEASE_CODE
+#define UBUNTU_RELEASE_CODE 0
+#endif
+#ifndef UBUNTU_KERNEL_CODE
+#define UBUNTU_KERNEL_CODE 0
+#endif
+
+#ifdef __KLOCWORK__
+#ifdef ARRAY_SIZE
+#undef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+#endif /* __KLOCWORK__ */
+
+/*****************************************************************************/
+/* 2.4.3 => 2.4.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
+
+/**************************************/
+/* PCI DRIVER API */
+
+#ifndef pci_set_dma_mask
+#define pci_set_dma_mask _kc_pci_set_dma_mask
+extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask);
+#endif
+
+#ifndef pci_request_regions
+#define pci_request_regions _kc_pci_request_regions
+extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name);
+#endif
+
+#ifndef pci_release_regions
+#define pci_release_regions _kc_pci_release_regions
+extern void _kc_pci_release_regions(struct pci_dev *pdev);
+#endif
+
+/**************************************/
+/* NETWORK DRIVER API */
+
+#ifndef alloc_etherdev
+#define alloc_etherdev _kc_alloc_etherdev
+extern struct net_device * _kc_alloc_etherdev(int sizeof_priv);
+#endif
+
+#ifndef is_valid_ether_addr
+#define is_valid_ether_addr _kc_is_valid_ether_addr
+extern int _kc_is_valid_ether_addr(u8 *addr);
+#endif
+
+/**************************************/
+/* MISCELLANEOUS */
+
+#ifndef INIT_TQUEUE
+#define INIT_TQUEUE(_tq, _routine, _data) \
+ do { \
+ INIT_LIST_HEAD(&(_tq)->list); \
+ (_tq)->sync = 0; \
+ (_tq)->routine = _routine; \
+ (_tq)->data = _data; \
+ } while (0)
+#endif
+
+#endif /* 2.4.3 => 2.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) )
+/* Generic MII registers. */
+#define MII_BMCR 0x00 /* Basic mode control register */
+#define MII_BMSR 0x01 /* Basic mode status register */
+#define MII_PHYSID1 0x02 /* PHYS ID 1 */
+#define MII_PHYSID2 0x03 /* PHYS ID 2 */
+#define MII_ADVERTISE 0x04 /* Advertisement control reg */
+#define MII_LPA 0x05 /* Link partner ability reg */
+#define MII_EXPANSION 0x06 /* Expansion register */
+/* Basic mode control register. */
+#define BMCR_FULLDPLX 0x0100 /* Full duplex */
+#define BMCR_ANENABLE 0x1000 /* Enable auto negotiation */
+/* Basic mode status register. */
+#define BMSR_ERCAP 0x0001 /* Ext-reg capability */
+#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */
+#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */
+#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */
+#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */
+#define BMSR_100FULL 0x4000 /* Can do 100mbps, full-duplex */
+/* Advertisement control register. */
+#define ADVERTISE_CSMA 0x0001 /* Only selector supported */
+#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */
+#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */
+#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */
+#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */
+#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \
+ ADVERTISE_100HALF | ADVERTISE_100FULL)
+/* Expansion register for auto-negotiation. */
+#define EXPANSION_ENABLENPAGE 0x0004 /* This enables npage words */
+#endif
+
+/*****************************************************************************/
+/* 2.4.6 => 2.4.3 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
+
+#ifndef pci_set_power_state
+#define pci_set_power_state _kc_pci_set_power_state
+extern int _kc_pci_set_power_state(struct pci_dev *dev, int state);
+#endif
+
+#ifndef pci_enable_wake
+#define pci_enable_wake _kc_pci_enable_wake
+extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable);
+#endif
+
+#ifndef pci_disable_device
+#define pci_disable_device _kc_pci_disable_device
+extern void _kc_pci_disable_device(struct pci_dev *pdev);
+#endif
+
+/* PCI PM entry point syntax changed, so don't support suspend/resume */
+#undef CONFIG_PM
+
+#endif /* 2.4.6 => 2.4.3 */
+
+#ifndef HAVE_PCI_SET_MWI
+#define pci_set_mwi(X) pci_write_config_word(X, \
+ PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \
+ PCI_COMMAND_INVALIDATE);
+#define pci_clear_mwi(X) pci_write_config_word(X, \
+ PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \
+ ~PCI_COMMAND_INVALIDATE);
+#endif
+
+/*****************************************************************************/
+/* 2.4.10 => 2.4.9 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) )
+
+/**************************************/
+/* MODULE API */
+
+#ifndef MODULE_LICENSE
+ #define MODULE_LICENSE(X)
+#endif
+
+/**************************************/
+/* OTHER */
+
+#undef min
+#define min(x,y) ({ \
+ const typeof(x) _x = (x); \
+ const typeof(y) _y = (y); \
+ (void) (&_x == &_y); \
+ _x < _y ? _x : _y; })
+
+#undef max
+#define max(x,y) ({ \
+ const typeof(x) _x = (x); \
+ const typeof(y) _y = (y); \
+ (void) (&_x == &_y); \
+ _x > _y ? _x : _y; })
+
+#define min_t(type,x,y) ({ \
+ type _x = (x); \
+ type _y = (y); \
+ _x < _y ? _x : _y; })
+
+#define max_t(type,x,y) ({ \
+ type _x = (x); \
+ type _y = (y); \
+ _x > _y ? _x : _y; })
+
+#ifndef list_for_each_safe
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+#endif
+
+#ifndef ____cacheline_aligned_in_smp
+#ifdef CONFIG_SMP
+#define ____cacheline_aligned_in_smp ____cacheline_aligned
+#else
+#define ____cacheline_aligned_in_smp
+#endif /* CONFIG_SMP */
+#endif
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
+extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...);
+#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args)
+extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args)
+#else /* 2.4.8 => 2.4.9 */
+extern int snprintf(char * buf, size_t size, const char *fmt, ...);
+extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+#endif
+#endif /* 2.4.10 -> 2.4.6 */
+
+
+/*****************************************************************************/
+/* 2.4.12 => 2.4.10 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) )
+#ifndef HAVE_NETIF_MSG
+#define HAVE_NETIF_MSG 1
+enum {
+ NETIF_MSG_DRV = 0x0001,
+ NETIF_MSG_PROBE = 0x0002,
+ NETIF_MSG_LINK = 0x0004,
+ NETIF_MSG_TIMER = 0x0008,
+ NETIF_MSG_IFDOWN = 0x0010,
+ NETIF_MSG_IFUP = 0x0020,
+ NETIF_MSG_RX_ERR = 0x0040,
+ NETIF_MSG_TX_ERR = 0x0080,
+ NETIF_MSG_TX_QUEUED = 0x0100,
+ NETIF_MSG_INTR = 0x0200,
+ NETIF_MSG_TX_DONE = 0x0400,
+ NETIF_MSG_RX_STATUS = 0x0800,
+ NETIF_MSG_PKTDATA = 0x1000,
+ NETIF_MSG_HW = 0x2000,
+ NETIF_MSG_WOL = 0x4000,
+};
+
+#define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV)
+#define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE)
+#define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK)
+#define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER)
+#define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN)
+#define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP)
+#define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR)
+#define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR)
+#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED)
+#define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR)
+#define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE)
+#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS)
+#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA)
+#endif /* !HAVE_NETIF_MSG */
+#endif /* 2.4.12 => 2.4.10 */
+
+/*****************************************************************************/
+/* 2.4.13 => 2.4.12 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
+
+/**************************************/
+/* PCI DMA MAPPING */
+
+#ifndef virt_to_page
+ #define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT))
+#endif
+
+#ifndef pci_map_page
+#define pci_map_page _kc_pci_map_page
+extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction);
+#endif
+
+#ifndef pci_unmap_page
+#define pci_unmap_page _kc_pci_unmap_page
+extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction);
+#endif
+
+/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */
+
+#undef DMA_32BIT_MASK
+#define DMA_32BIT_MASK 0xffffffff
+#undef DMA_64BIT_MASK
+#define DMA_64BIT_MASK 0xffffffff
+
+/**************************************/
+/* OTHER */
+
+#ifndef cpu_relax
+#define cpu_relax() rep_nop()
+#endif
+
+struct vlan_ethhdr {
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ unsigned short h_vlan_proto;
+ unsigned short h_vlan_TCI;
+ unsigned short h_vlan_encapsulated_proto;
+};
+#endif /* 2.4.13 => 2.4.12 */
+
+/*****************************************************************************/
+/* 2.4.17 => 2.4.12 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) )
+
+#ifndef __devexit_p
+ #define __devexit_p(x) &(x)
+#endif
+
+#else
+ /* For Kernel 3.8 these are not defined - so undefine all */
+ #undef __devexit_p
+ #undef __devexit
+ #undef __devinit
+ #undef __devinitdata
+ #define __devexit_p(x) &(x)
+ #define __devexit
+ #define __devinit
+ #define __devinitdata
+
+#endif /* 2.4.17 => 2.4.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) )
+#define NETIF_MSG_HW 0x2000
+#define NETIF_MSG_WOL 0x4000
+
+#ifndef netif_msg_hw
+#define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW)
+#endif
+#ifndef netif_msg_wol
+#define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL)
+#endif
+#endif /* 2.4.18 */
+
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* 2.4.20 => 2.4.19 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) )
+
+/* we won't support NAPI on less than 2.4.20 */
+#ifdef NAPI
+#undef NAPI
+#endif
+
+#endif /* 2.4.20 => 2.4.19 */
+
+/*****************************************************************************/
+/* 2.4.22 => 2.4.17 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
+#define pci_name(x) ((x)->slot_name)
+
+#ifndef SUPPORTED_10000baseT_Full
+#define SUPPORTED_10000baseT_Full (1 << 12)
+#endif
+#ifndef ADVERTISED_10000baseT_Full
+#define ADVERTISED_10000baseT_Full (1 << 12)
+#endif
+#endif
+
+/*****************************************************************************/
+/* 2.4.22 => 2.4.17 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
+#ifndef IGB_NO_LRO
+#define IGB_NO_LRO
+#endif
+#endif
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* 2.4.23 => 2.4.22 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) )
+/*****************************************************************************/
+#ifdef NAPI
+#ifndef netif_poll_disable
+#define netif_poll_disable(x) _kc_netif_poll_disable(x)
+static inline void _kc_netif_poll_disable(struct net_device *netdev)
+{
+ while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) {
+ /* No hurry */
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(1);
+ }
+}
+#endif
+#ifndef netif_poll_enable
+#define netif_poll_enable(x) _kc_netif_poll_enable(x)
+static inline void _kc_netif_poll_enable(struct net_device *netdev)
+{
+ clear_bit(__LINK_STATE_RX_SCHED, &netdev->state);
+}
+#endif
+#endif /* NAPI */
+#ifndef netif_tx_disable
+#define netif_tx_disable(x) _kc_netif_tx_disable(x)
+static inline void _kc_netif_tx_disable(struct net_device *dev)
+{
+ spin_lock_bh(&dev->xmit_lock);
+ netif_stop_queue(dev);
+ spin_unlock_bh(&dev->xmit_lock);
+}
+#endif
+#else /* 2.4.23 => 2.4.22 */
+#define HAVE_SCTP
+#endif /* 2.4.23 => 2.4.22 */
+
+/*****************************************************************************/
+/* 2.6.4 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \
+ ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
+ LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) )
+#define ETHTOOL_OPS_COMPAT
+#endif /* 2.6.4 => 2.6.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) )
+#define __user
+#endif /* < 2.4.27 */
+
+/*****************************************************************************/
+/* 2.5.71 => 2.4.x */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) )
+#define sk_protocol protocol
+#define pci_get_device pci_find_device
+#endif /* 2.5.70 => 2.4.x */
+
+/*****************************************************************************/
+/* < 2.4.27 or 2.6.0 <= 2.6.5 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \
+ ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
+ LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) )
+
+#ifndef netif_msg_init
+#define netif_msg_init _kc_netif_msg_init
+static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits)
+{
+ /* use default */
+ if (debug_value < 0 || debug_value >= (sizeof(u32) * 8))
+ return default_msg_enable_bits;
+ if (debug_value == 0) /* no output */
+ return 0;
+ /* set low N bits */
+ return (1 << debug_value) -1;
+}
+#endif
+
+#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */
+/*****************************************************************************/
+#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \
+ (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \
+ ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) )))
+#define netdev_priv(x) x->priv
+#endif
+
+/*****************************************************************************/
+/* <= 2.5.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) )
+#include <linux/rtnetlink.h>
+#undef pci_register_driver
+#define pci_register_driver pci_module_init
+
+/*
+ * Most of the dma compat code is copied/modifed from the 2.4.37
+ * /include/linux/libata-compat.h header file
+ */
+/* These definitions mirror those in pci.h, so they can be used
+ * interchangeably with their PCI_ counterparts */
+enum dma_data_direction {
+ DMA_BIDIRECTIONAL = 0,
+ DMA_TO_DEVICE = 1,
+ DMA_FROM_DEVICE = 2,
+ DMA_NONE = 3,
+};
+
+struct device {
+ struct pci_dev pdev;
+};
+
+static inline struct pci_dev *to_pci_dev (struct device *dev)
+{
+ return (struct pci_dev *) dev;
+}
+static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
+{
+ return (struct device *) pdev;
+}
+
+#define pdev_printk(lvl, pdev, fmt, args...) \
+ printk("%s %s: " fmt, lvl, pci_name(pdev), ## args)
+#define dev_err(dev, fmt, args...) \
+ pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args)
+#define dev_info(dev, fmt, args...) \
+ pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args)
+#define dev_warn(dev, fmt, args...) \
+ pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args)
+#define dev_notice(dev, fmt, args...) \
+ pdev_printk(KERN_NOTICE, to_pci_dev(dev), fmt, ## args)
+#define dev_dbg(dev, fmt, args...) \
+ pdev_printk(KERN_DEBUG, to_pci_dev(dev), fmt, ## args)
+
+/* NOTE: dangerous! we ignore the 'gfp' argument */
+#define dma_alloc_coherent(dev,sz,dma,gfp) \
+ pci_alloc_consistent(to_pci_dev(dev),(sz),(dma))
+#define dma_free_coherent(dev,sz,addr,dma_addr) \
+ pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr))
+
+#define dma_map_page(dev,a,b,c,d) \
+ pci_map_page(to_pci_dev(dev),(a),(b),(c),(d))
+#define dma_unmap_page(dev,a,b,c) \
+ pci_unmap_page(to_pci_dev(dev),(a),(b),(c))
+
+#define dma_map_single(dev,a,b,c) \
+ pci_map_single(to_pci_dev(dev),(a),(b),(c))
+#define dma_unmap_single(dev,a,b,c) \
+ pci_unmap_single(to_pci_dev(dev),(a),(b),(c))
+
+#define dma_map_sg(dev, sg, nents, dir) \
+ pci_map_sg(to_pci_dev(dev), (sg), (nents), (dir)
+#define dma_unmap_sg(dev, sg, nents, dir) \
+ pci_unmap_sg(to_pci_dev(dev), (sg), (nents), (dir)
+
+#define dma_sync_single(dev,a,b,c) \
+ pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c))
+
+/* for range just sync everything, that's all the pci API can do */
+#define dma_sync_single_range(dev,addr,off,sz,dir) \
+ pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir))
+
+#define dma_set_mask(dev,mask) \
+ pci_set_dma_mask(to_pci_dev(dev),(mask))
+
+/* hlist_* code - double linked lists */
+struct hlist_head {
+ struct hlist_node *first;
+};
+
+struct hlist_node {
+ struct hlist_node *next, **pprev;
+};
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+ struct hlist_node *next = n->next;
+ struct hlist_node **pprev = n->pprev;
+ *pprev = next;
+ if (next)
+ next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+ __hlist_del(n);
+ n->next = NULL;
+ n->pprev = NULL;
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+ struct hlist_node *first = h->first;
+ n->next = first;
+ if (first)
+ first->pprev = &n->next;
+ h->first = n;
+ n->pprev = &h->first;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+ return !h->first;
+}
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+ h->next = NULL;
+ h->pprev = NULL;
+}
+
+#ifndef might_sleep
+#define might_sleep()
+#endif
+#else
+static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
+{
+ return &pdev->dev;
+}
+#endif /* <= 2.5.0 */
+
+/*****************************************************************************/
+/* 2.5.28 => 2.4.23 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) )
+
+#include <linux/tqueue.h>
+#define work_struct tq_struct
+#undef INIT_WORK
+#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a)
+#undef container_of
+#define container_of list_entry
+#define schedule_work schedule_task
+#define flush_scheduled_work flush_scheduled_tasks
+#define cancel_work_sync(x) flush_scheduled_work()
+
+#endif /* 2.5.28 => 2.4.17 */
+
+/*****************************************************************************/
+/* 2.6.0 => 2.5.28 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+#ifndef read_barrier_depends
+#define read_barrier_depends() rmb()
+#endif
+
+#undef get_cpu
+#define get_cpu() smp_processor_id()
+#undef put_cpu
+#define put_cpu() do { } while(0)
+#define MODULE_INFO(version, _version)
+#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT
+#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1
+#endif
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1
+#endif
+
+#define dma_set_coherent_mask(dev,mask) 1
+
+#undef dev_put
+#define dev_put(dev) __dev_put(dev)
+
+#ifndef skb_fill_page_desc
+#define skb_fill_page_desc _kc_skb_fill_page_desc
+extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size);
+#endif
+
+#undef ALIGN
+#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
+
+#ifndef page_count
+#define page_count(p) atomic_read(&(p)->count)
+#endif
+
+#ifdef MAX_NUMNODES
+#undef MAX_NUMNODES
+#endif
+#define MAX_NUMNODES 1
+
+/* find_first_bit and find_next bit are not defined for most
+ * 2.4 kernels (except for the redhat 2.4.21 kernels
+ */
+#include <linux/bitops.h>
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+#undef find_next_bit
+#define find_next_bit _kc_find_next_bit
+extern unsigned long _kc_find_next_bit(const unsigned long *addr,
+ unsigned long size,
+ unsigned long offset);
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+
+
+#ifndef netdev_name
+static inline const char *_kc_netdev_name(const struct net_device *dev)
+{
+ if (strchr(dev->name, '%'))
+ return "(unregistered net_device)";
+ return dev->name;
+}
+#define netdev_name(netdev) _kc_netdev_name(netdev)
+#endif /* netdev_name */
+
+#ifndef strlcpy
+#define strlcpy _kc_strlcpy
+extern size_t _kc_strlcpy(char *dest, const char *src, size_t size);
+#endif /* strlcpy */
+
+#ifndef do_div
+#if BITS_PER_LONG == 64
+# define do_div(n,base) ({ \
+ uint32_t __base = (base); \
+ uint32_t __rem; \
+ __rem = ((uint64_t)(n)) % __base; \
+ (n) = ((uint64_t)(n)) / __base; \
+ __rem; \
+ })
+#elif BITS_PER_LONG == 32
+extern uint32_t _kc__div64_32(uint64_t *dividend, uint32_t divisor);
+# define do_div(n,base) ({ \
+ uint32_t __base = (base); \
+ uint32_t __rem; \
+ if (likely(((n) >> 32) == 0)) { \
+ __rem = (uint32_t)(n) % __base; \
+ (n) = (uint32_t)(n) / __base; \
+ } else \
+ __rem = _kc__div64_32(&(n), __base); \
+ __rem; \
+ })
+#else /* BITS_PER_LONG == ?? */
+# error do_div() does not yet support the C64
+#endif /* BITS_PER_LONG */
+#endif /* do_div */
+
+#ifndef NSEC_PER_SEC
+#define NSEC_PER_SEC 1000000000L
+#endif
+
+#undef HAVE_I2C_SUPPORT
+#else /* 2.6.0 */
+#if IS_ENABLED(CONFIG_I2C_ALGOBIT) && \
+ (RHEL_RELEASE_CODE && (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,9)))
+#define HAVE_I2C_SUPPORT
+#endif /* IS_ENABLED(CONFIG_I2C_ALGOBIT) */
+
+#endif /* 2.6.0 => 2.5.28 */
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) )
+#define dma_pool pci_pool
+#define dma_pool_destroy pci_pool_destroy
+#define dma_pool_alloc pci_pool_alloc
+#define dma_pool_free pci_pool_free
+
+#define dma_pool_create(name,dev,size,align,allocation) \
+ pci_pool_create((name),to_pci_dev(dev),(size),(align),(allocation))
+#endif /* < 2.6.3 */
+
+/*****************************************************************************/
+/* 2.6.4 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
+#endif /* 2.6.4 => 2.6.0 */
+
+/*****************************************************************************/
+/* 2.6.5 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) )
+#define dma_sync_single_for_cpu dma_sync_single
+#define dma_sync_single_for_device dma_sync_single
+#define dma_sync_single_range_for_cpu dma_sync_single_range
+#define dma_sync_single_range_for_device dma_sync_single_range
+#ifndef pci_dma_mapping_error
+#define pci_dma_mapping_error _kc_pci_dma_mapping_error
+static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr)
+{
+ return dma_addr == 0;
+}
+#endif
+#endif /* 2.6.5 => 2.6.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...);
+#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args)
+#endif /* < 2.6.4 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) )
+/* taken from 2.6 include/linux/bitmap.h */
+#undef bitmap_zero
+#define bitmap_zero _kc_bitmap_zero
+static inline void _kc_bitmap_zero(unsigned long *dst, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = 0UL;
+ else {
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ memset(dst, 0, len);
+ }
+}
+#define random_ether_addr _kc_random_ether_addr
+static inline void _kc_random_ether_addr(u8 *addr)
+{
+ get_random_bytes(addr, ETH_ALEN);
+ addr[0] &= 0xfe; /* clear multicast */
+ addr[0] |= 0x02; /* set local assignment */
+}
+#define page_to_nid(x) 0
+
+#endif /* < 2.6.6 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) )
+#undef if_mii
+#define if_mii _kc_if_mii
+static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq)
+{
+ return (struct mii_ioctl_data *) &rq->ifr_ifru;
+}
+
+#ifndef __force
+#define __force
+#endif
+#endif /* < 2.6.7 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
+#ifndef PCI_EXP_DEVCTL
+#define PCI_EXP_DEVCTL 8
+#endif
+#ifndef PCI_EXP_DEVCTL_CERE
+#define PCI_EXP_DEVCTL_CERE 0x0001
+#endif
+#define PCI_EXP_FLAGS 2 /* Capabilities register */
+#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */
+#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */
+#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */
+#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */
+#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */
+#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */
+#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */
+#define PCI_EXP_DEVCAP 4 /* Device capabilities */
+#define PCI_EXP_DEVSTA 10 /* Device Status */
+#define msleep(x) do { set_current_state(TASK_UNINTERRUPTIBLE); \
+ schedule_timeout((x * HZ)/1000 + 2); \
+ } while (0)
+
+#endif /* < 2.6.8 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9))
+#include <net/dsfield.h>
+#define __iomem
+
+#ifndef kcalloc
+#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags)
+extern void *_kc_kzalloc(size_t size, int flags);
+#endif
+#define MSEC_PER_SEC 1000L
+static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j)
+{
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+ return (MSEC_PER_SEC / HZ) * j;
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+ return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
+#else
+ return (j * MSEC_PER_SEC) / HZ;
+#endif
+}
+static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m)
+{
+ if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET))
+ return MAX_JIFFY_OFFSET;
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+ return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+ return m * (HZ / MSEC_PER_SEC);
+#else
+ return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC;
+#endif
+}
+
+#define msleep_interruptible _kc_msleep_interruptible
+static inline unsigned long _kc_msleep_interruptible(unsigned int msecs)
+{
+ unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1;
+
+ while (timeout && !signal_pending(current)) {
+ __set_current_state(TASK_INTERRUPTIBLE);
+ timeout = schedule_timeout(timeout);
+ }
+ return _kc_jiffies_to_msecs(timeout);
+}
+
+/* Basic mode control register. */
+#define BMCR_SPEED1000 0x0040 /* MSB of Speed (1000) */
+
+#ifndef __le16
+#define __le16 u16
+#endif
+#ifndef __le32
+#define __le32 u32
+#endif
+#ifndef __le64
+#define __le64 u64
+#endif
+#ifndef __be16
+#define __be16 u16
+#endif
+#ifndef __be32
+#define __be32 u32
+#endif
+#ifndef __be64
+#define __be64 u64
+#endif
+
+static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
+{
+ return (struct vlan_ethhdr *)skb->mac.raw;
+}
+
+/* Wake-On-Lan options. */
+#define WAKE_PHY (1 << 0)
+#define WAKE_UCAST (1 << 1)
+#define WAKE_MCAST (1 << 2)
+#define WAKE_BCAST (1 << 3)
+#define WAKE_ARP (1 << 4)
+#define WAKE_MAGIC (1 << 5)
+#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */
+
+#define skb_header_pointer _kc_skb_header_pointer
+static inline void *_kc_skb_header_pointer(const struct sk_buff *skb,
+ int offset, int len, void *buffer)
+{
+ int hlen = skb_headlen(skb);
+
+ if (hlen - offset >= len)
+ return skb->data + offset;
+
+#ifdef MAX_SKB_FRAGS
+ if (skb_copy_bits(skb, offset, buffer, len) < 0)
+ return NULL;
+
+ return buffer;
+#else
+ return NULL;
+#endif
+
+#ifndef NETDEV_TX_OK
+#define NETDEV_TX_OK 0
+#endif
+#ifndef NETDEV_TX_BUSY
+#define NETDEV_TX_BUSY 1
+#endif
+#ifndef NETDEV_TX_LOCKED
+#define NETDEV_TX_LOCKED -1
+#endif
+}
+
+#ifndef __bitwise
+#define __bitwise
+#endif
+#endif /* < 2.6.9 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
+#ifdef module_param_array_named
+#undef module_param_array_named
+#define module_param_array_named(name, array, type, nump, perm) \
+ static struct kparam_array __param_arr_##name \
+ = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \
+ sizeof(array[0]), array }; \
+ module_param_call(name, param_array_set, param_array_get, \
+ &__param_arr_##name, perm)
+#endif /* module_param_array_named */
+/*
+ * num_online is broken for all < 2.6.10 kernels. This is needed to support
+ * Node module parameter of ixgbe.
+ */
+#undef num_online_nodes
+#define num_online_nodes(n) 1
+extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES);
+#undef node_online_map
+#define node_online_map _kcompat_node_online_map
+#define pci_get_class pci_find_class
+#endif /* < 2.6.10 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) )
+#define PCI_D0 0
+#define PCI_D1 1
+#define PCI_D2 2
+#define PCI_D3hot 3
+#define PCI_D3cold 4
+typedef int pci_power_t;
+#define pci_choose_state(pdev,state) state
+#define PMSG_SUSPEND 3
+#define PCI_EXP_LNKCTL 16
+
+#undef NETIF_F_LLTX
+
+#ifndef ARCH_HAS_PREFETCH
+#define prefetch(X)
+#endif
+
+#ifndef NET_IP_ALIGN
+#define NET_IP_ALIGN 2
+#endif
+
+#define KC_USEC_PER_SEC 1000000L
+#define usecs_to_jiffies _kc_usecs_to_jiffies
+static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j)
+{
+#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
+ return (KC_USEC_PER_SEC / HZ) * j;
+#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
+ return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC);
+#else
+ return (j * KC_USEC_PER_SEC) / HZ;
+#endif
+}
+static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m)
+{
+ if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET))
+ return MAX_JIFFY_OFFSET;
+#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
+ return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ);
+#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
+ return m * (HZ / KC_USEC_PER_SEC);
+#else
+ return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC;
+#endif
+}
+
+#define PCI_EXP_LNKCAP 12 /* Link Capabilities */
+#define PCI_EXP_LNKSTA 18 /* Link Status */
+#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */
+#define PCI_EXP_SLTCTL 24 /* Slot Control */
+#define PCI_EXP_SLTSTA 26 /* Slot Status */
+#define PCI_EXP_RTCTL 28 /* Root Control */
+#define PCI_EXP_RTCAP 30 /* Root Capabilities */
+#define PCI_EXP_RTSTA 32 /* Root Status */
+#endif /* < 2.6.11 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) )
+#include <linux/reboot.h>
+#define USE_REBOOT_NOTIFIER
+
+/* Generic MII registers. */
+#define MII_CTRL1000 0x09 /* 1000BASE-T control */
+#define MII_STAT1000 0x0a /* 1000BASE-T status */
+/* Advertisement control register. */
+#define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */
+#define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymmetric pause */
+/* Link partner ability register. */
+#define LPA_PAUSE_CAP 0x0400 /* Can pause */
+#define LPA_PAUSE_ASYM 0x0800 /* Can pause asymetrically */
+/* 1000BASE-T Control register */
+#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */
+#define ADVERTISE_1000HALF 0x0100 /* Advertise 1000BASE-T half duplex */
+/* 1000BASE-T Status register */
+#define LPA_1000LOCALRXOK 0x2000 /* Link partner local receiver status */
+#define LPA_1000REMRXOK 0x1000 /* Link partner remote receiver status */
+
+#ifndef is_zero_ether_addr
+#define is_zero_ether_addr _kc_is_zero_ether_addr
+static inline int _kc_is_zero_ether_addr(const u8 *addr)
+{
+ return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
+}
+#endif /* is_zero_ether_addr */
+#ifndef is_multicast_ether_addr
+#define is_multicast_ether_addr _kc_is_multicast_ether_addr
+static inline int _kc_is_multicast_ether_addr(const u8 *addr)
+{
+ return addr[0] & 0x01;
+}
+#endif /* is_multicast_ether_addr */
+#endif /* < 2.6.12 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
+#ifndef kstrdup
+#define kstrdup _kc_kstrdup
+extern char *_kc_kstrdup(const char *s, unsigned int gfp);
+#endif
+#endif /* < 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
+#define pm_message_t u32
+#ifndef kzalloc
+#define kzalloc _kc_kzalloc
+extern void *_kc_kzalloc(size_t size, int flags);
+#endif
+
+/* Generic MII registers. */
+#define MII_ESTATUS 0x0f /* Extended Status */
+/* Basic mode status register. */
+#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */
+/* Extended status register. */
+#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */
+#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */
+
+#define SUPPORTED_Pause (1 << 13)
+#define SUPPORTED_Asym_Pause (1 << 14)
+#define ADVERTISED_Pause (1 << 13)
+#define ADVERTISED_Asym_Pause (1 << 14)
+
+#if (!(RHEL_RELEASE_CODE && \
+ (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))))
+#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t))
+#define gfp_t unsigned
+#else
+typedef unsigned gfp_t;
+#endif
+#endif /* !RHEL4.3->RHEL5.0 */
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) )
+#ifdef CONFIG_X86_64
+#define dma_sync_single_range_for_cpu(dev, addr, off, sz, dir) \
+ dma_sync_single_for_cpu((dev), (addr), (off) + (sz), (dir))
+#define dma_sync_single_range_for_device(dev, addr, off, sz, dir) \
+ dma_sync_single_for_device((dev), (addr), (off) + (sz), (dir))
+#endif
+#endif
+#endif /* < 2.6.14 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) )
+#ifndef vmalloc_node
+#define vmalloc_node(a,b) vmalloc(a)
+#endif /* vmalloc_node*/
+
+#define setup_timer(_timer, _function, _data) \
+do { \
+ (_timer)->function = _function; \
+ (_timer)->data = _data; \
+ init_timer(_timer); \
+} while (0)
+#ifndef device_can_wakeup
+#define device_can_wakeup(dev) (1)
+#endif
+#ifndef device_set_wakeup_enable
+#define device_set_wakeup_enable(dev, val) do{}while(0)
+#endif
+#ifndef device_init_wakeup
+#define device_init_wakeup(dev,val) do {} while (0)
+#endif
+static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2)
+{
+ const u16 *a = (const u16 *) addr1;
+ const u16 *b = (const u16 *) addr2;
+
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
+}
+#undef compare_ether_addr
+#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2)
+#endif /* < 2.6.15 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) )
+#undef DEFINE_MUTEX
+#define DEFINE_MUTEX(x) DECLARE_MUTEX(x)
+#define mutex_lock(x) down_interruptible(x)
+#define mutex_unlock(x) up(x)
+
+#ifndef ____cacheline_internodealigned_in_smp
+#ifdef CONFIG_SMP
+#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp
+#else
+#define ____cacheline_internodealigned_in_smp
+#endif /* CONFIG_SMP */
+#endif /* ____cacheline_internodealigned_in_smp */
+#undef HAVE_PCI_ERS
+#else /* 2.6.16 and above */
+#undef HAVE_PCI_ERS
+#define HAVE_PCI_ERS
+#if ( SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(10,4,0) )
+#ifdef device_can_wakeup
+#undef device_can_wakeup
+#endif /* device_can_wakeup */
+#define device_can_wakeup(dev) 1
+#endif /* SLE_VERSION(10,4,0) */
+#endif /* < 2.6.16 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) )
+#ifndef dev_notice
+#define dev_notice(dev, fmt, args...) \
+ dev_printk(KERN_NOTICE, dev, fmt, ## args)
+#endif
+
+#ifndef first_online_node
+#define first_online_node 0
+#endif
+#ifndef NET_SKB_PAD
+#define NET_SKB_PAD 16
+#endif
+#endif /* < 2.6.17 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) )
+
+#ifndef IRQ_HANDLED
+#define irqreturn_t void
+#define IRQ_HANDLED
+#define IRQ_NONE
+#endif
+
+#ifndef IRQF_PROBE_SHARED
+#ifdef SA_PROBEIRQ
+#define IRQF_PROBE_SHARED SA_PROBEIRQ
+#else
+#define IRQF_PROBE_SHARED 0
+#endif
+#endif
+
+#ifndef IRQF_SHARED
+#define IRQF_SHARED SA_SHIRQ
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#ifndef FIELD_SIZEOF
+#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+#endif
+
+#ifndef skb_is_gso
+#ifdef NETIF_F_TSO
+#define skb_is_gso _kc_skb_is_gso
+static inline int _kc_skb_is_gso(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->gso_size;
+}
+#else
+#define skb_is_gso(a) 0
+#endif
+#endif
+
+#ifndef resource_size_t
+#define resource_size_t unsigned long
+#endif
+
+#ifdef skb_pad
+#undef skb_pad
+#endif
+#define skb_pad(x,y) _kc_skb_pad(x, y)
+int _kc_skb_pad(struct sk_buff *skb, int pad);
+#ifdef skb_padto
+#undef skb_padto
+#endif
+#define skb_padto(x,y) _kc_skb_padto(x, y)
+static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len)
+{
+ unsigned int size = skb->len;
+ if(likely(size >= len))
+ return 0;
+ return _kc_skb_pad(skb, len - size);
+}
+
+#ifndef DECLARE_PCI_UNMAP_ADDR
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
+ dma_addr_t ADDR_NAME
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
+ u32 LEN_NAME
+#define pci_unmap_addr(PTR, ADDR_NAME) \
+ ((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
+ (((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME) \
+ ((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
+ (((PTR)->LEN_NAME) = (VAL))
+#endif /* DECLARE_PCI_UNMAP_ADDR */
+#endif /* < 2.6.18 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
+
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,0)))
+#define i_private u.generic_ip
+#endif /* >= RHEL 5.0 */
+
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#endif
+#ifndef __ALIGN_MASK
+#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+#endif
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) )
+#if (!((RHEL_RELEASE_CODE && \
+ ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \
+ RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \
+ (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0))))))
+typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *);
+#endif
+#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
+#undef CONFIG_INET_LRO
+#undef CONFIG_INET_LRO_MODULE
+#ifdef IXGBE_FCOE
+#undef CONFIG_FCOE
+#undef CONFIG_FCOE_MODULE
+#endif /* IXGBE_FCOE */
+#endif
+typedef irqreturn_t (*new_handler_t)(int, void*);
+static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
+#else /* 2.4.x */
+typedef void (*irq_handler_t)(int, void*, struct pt_regs *);
+typedef void (*new_handler_t)(int, void*);
+static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
+#endif /* >= 2.5.x */
+{
+ irq_handler_t new_handler = (irq_handler_t) handler;
+ return request_irq(irq, new_handler, flags, devname, dev_id);
+}
+
+#undef request_irq
+#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id))
+
+#define irq_handler_t new_handler_t
+/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
+#define PCIE_CONFIG_SPACE_LEN 256
+#define PCI_CONFIG_SPACE_LEN 64
+#define PCIE_LINK_STATUS 0x12
+#define pci_config_space_ich8lan() do {} while(0)
+#undef pci_save_state
+extern int _kc_pci_save_state(struct pci_dev *);
+#define pci_save_state(pdev) _kc_pci_save_state(pdev)
+#undef pci_restore_state
+extern void _kc_pci_restore_state(struct pci_dev *);
+#define pci_restore_state(pdev) _kc_pci_restore_state(pdev)
+#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
+
+#ifdef HAVE_PCI_ERS
+#undef free_netdev
+extern void _kc_free_netdev(struct net_device *);
+#define free_netdev(netdev) _kc_free_netdev(netdev)
+#endif
+static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
+{
+ return 0;
+}
+#define pci_disable_pcie_error_reporting(dev) do {} while (0)
+#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0)
+
+extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp);
+#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp)
+#ifndef bool
+#define bool _Bool
+#define true 1
+#define false 0
+#endif
+#else /* 2.6.19 */
+#include <linux/aer.h>
+#include <linux/string.h>
+#endif /* < 2.6.19 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) )
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) )
+#undef INIT_WORK
+#define INIT_WORK(_work, _func) \
+do { \
+ INIT_LIST_HEAD(&(_work)->entry); \
+ (_work)->pending = 0; \
+ (_work)->func = (void (*)(void *))_func; \
+ (_work)->data = _work; \
+ init_timer(&(_work)->timer); \
+} while (0)
+#endif
+
+#ifndef PCI_VDEVICE
+#define PCI_VDEVICE(ven, dev) \
+ PCI_VENDOR_ID_##ven, (dev), \
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0
+#endif
+
+#ifndef PCI_VENDOR_ID_INTEL
+#define PCI_VENDOR_ID_INTEL 0x8086
+#endif
+
+#ifndef round_jiffies
+#define round_jiffies(x) x
+#endif
+
+#define csum_offset csum
+
+#define HAVE_EARLY_VMALLOC_NODE
+#define dev_to_node(dev) -1
+#undef set_dev_node
+/* remove compiler warning with b=b, for unused variable */
+#define set_dev_node(a, b) do { (b) = (b); } while(0)
+
+#if (!(RHEL_RELEASE_CODE && \
+ (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \
+ !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+#endif
+
+#if (!(RHEL_RELEASE_CODE && \
+ (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \
+ !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
+static inline __wsum csum_unfold(__sum16 n)
+{
+ return (__force __wsum)n;
+}
+#endif
+
+#else /* < 2.6.20 */
+#define HAVE_DEVICE_NUMA_NODE
+#endif /* < 2.6.20 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
+#define to_net_dev(class) container_of(class, struct net_device, class_dev)
+#define NETDEV_CLASS_DEV
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)))
+#define vlan_group_get_device(vg, id) (vg->vlan_devices[id])
+#define vlan_group_set_device(vg, id, dev) \
+ do { \
+ if (vg) vg->vlan_devices[id] = dev; \
+ } while (0)
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */
+#define pci_channel_offline(pdev) (pdev->error_state && \
+ pdev->error_state != pci_channel_io_normal)
+#define pci_request_selected_regions(pdev, bars, name) \
+ pci_request_regions(pdev, name)
+#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev);
+
+#ifndef __aligned
+#define __aligned(x) __attribute__((aligned(x)))
+#endif
+
+extern struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev);
+#define netdev_to_dev(netdev) \
+ pci_dev_to_dev(_kc_netdev_to_pdev(netdev))
+#else
+static inline struct device *netdev_to_dev(struct net_device *netdev)
+{
+ return &netdev->dev;
+}
+
+#endif /* < 2.6.21 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+#define tcp_hdr(skb) (skb->h.th)
+#define tcp_hdrlen(skb) (skb->h.th->doff << 2)
+#define skb_transport_offset(skb) (skb->h.raw - skb->data)
+#define skb_transport_header(skb) (skb->h.raw)
+#define ipv6_hdr(skb) (skb->nh.ipv6h)
+#define ip_hdr(skb) (skb->nh.iph)
+#define skb_network_offset(skb) (skb->nh.raw - skb->data)
+#define skb_network_header(skb) (skb->nh.raw)
+#define skb_tail_pointer(skb) skb->tail
+#define skb_reset_tail_pointer(skb) \
+ do { \
+ skb->tail = skb->data; \
+ } while (0)
+#define skb_set_tail_pointer(skb, offset) \
+ do { \
+ skb->tail = skb->data + offset; \
+ } while (0)
+#define skb_copy_to_linear_data(skb, from, len) \
+ memcpy(skb->data, from, len)
+#define skb_copy_to_linear_data_offset(skb, offset, from, len) \
+ memcpy(skb->data + offset, from, len)
+#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw)
+#define pci_register_driver pci_module_init
+#define skb_mac_header(skb) skb->mac.raw
+
+#ifdef NETIF_F_MULTI_QUEUE
+#ifndef alloc_etherdev_mq
+#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a)
+#endif
+#endif /* NETIF_F_MULTI_QUEUE */
+
+#ifndef ETH_FCS_LEN
+#define ETH_FCS_LEN 4
+#endif
+#define cancel_work_sync(x) flush_scheduled_work()
+#ifndef udp_hdr
+#define udp_hdr _udp_hdr
+static inline struct udphdr *_udp_hdr(const struct sk_buff *skb)
+{
+ return (struct udphdr *)skb_transport_header(skb);
+}
+#endif
+
+#ifdef cpu_to_be16
+#undef cpu_to_be16
+#endif
+#define cpu_to_be16(x) __constant_htons(x)
+
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)))
+enum {
+ DUMP_PREFIX_NONE,
+ DUMP_PREFIX_ADDRESS,
+ DUMP_PREFIX_OFFSET
+};
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */
+#ifndef hex_asc
+#define hex_asc(x) "0123456789abcdef"[x]
+#endif
+#include <linux/ctype.h>
+extern void _kc_print_hex_dump(const char *level, const char *prefix_str,
+ int prefix_type, int rowsize, int groupsize,
+ const void *buf, size_t len, bool ascii);
+#define print_hex_dump(lvl, s, t, r, g, b, l, a) \
+ _kc_print_hex_dump(lvl, s, t, r, g, b, l, a)
+#ifndef ADVERTISED_2500baseX_Full
+#define ADVERTISED_2500baseX_Full (1 << 15)
+#endif
+#ifndef SUPPORTED_2500baseX_Full
+#define SUPPORTED_2500baseX_Full (1 << 15)
+#endif
+
+#ifdef HAVE_I2C_SUPPORT
+#include <linux/i2c.h>
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)))
+struct i2c_board_info {
+ char driver_name[KOBJ_NAME_LEN];
+ char type[I2C_NAME_SIZE];
+ unsigned short flags;
+ unsigned short addr;
+ void *platform_data;
+};
+#define I2C_BOARD_INFO(driver, dev_addr) .driver_name = (driver),\
+ .addr = (dev_addr)
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */
+#define i2c_new_device(adap, info) _kc_i2c_new_device(adap, info)
+extern struct i2c_client *
+_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info);
+#endif /* HAVE_I2C_SUPPORT */
+
+#else /* 2.6.22 */
+#define ETH_TYPE_TRANS_SETS_DEV
+#define HAVE_NETDEV_STATS_IN_NETDEV
+#endif /* < 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) )
+#undef SET_MODULE_OWNER
+#define SET_MODULE_OWNER(dev) do { } while (0)
+#endif /* > 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) )
+#define netif_subqueue_stopped(_a, _b) 0
+#ifndef PTR_ALIGN
+#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a)))
+#endif
+
+#ifndef CONFIG_PM_SLEEP
+#define CONFIG_PM_SLEEP CONFIG_PM
+#endif
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) )
+#define HAVE_ETHTOOL_GET_PERM_ADDR
+#endif /* 2.6.14 through 2.6.22 */
+#endif /* < 2.6.23 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
+#ifndef ETH_FLAG_LRO
+#define ETH_FLAG_LRO NETIF_F_LRO
+#endif
+
+/* if GRO is supported then the napi struct must already exist */
+#ifndef NETIF_F_GRO
+/* NAPI API changes in 2.6.24 break everything */
+struct napi_struct {
+ /* used to look up the real NAPI polling routine */
+ int (*poll)(struct napi_struct *, int);
+ struct net_device *dev;
+ int weight;
+};
+#endif
+
+#ifdef NAPI
+extern int __kc_adapter_clean(struct net_device *, int *);
+extern struct net_device *napi_to_poll_dev(const struct napi_struct *napi);
+#define netif_napi_add(_netdev, _napi, _poll, _weight) \
+ do { \
+ struct napi_struct *__napi = (_napi); \
+ struct net_device *poll_dev = napi_to_poll_dev(__napi); \
+ poll_dev->poll = &(__kc_adapter_clean); \
+ poll_dev->priv = (_napi); \
+ poll_dev->weight = (_weight); \
+ set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \
+ set_bit(__LINK_STATE_START, &poll_dev->state);\
+ dev_hold(poll_dev); \
+ __napi->poll = &(_poll); \
+ __napi->weight = (_weight); \
+ __napi->dev = (_netdev); \
+ } while (0)
+#define netif_napi_del(_napi) \
+ do { \
+ struct net_device *poll_dev = napi_to_poll_dev(_napi); \
+ WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \
+ dev_put(poll_dev); \
+ memset(poll_dev, 0, sizeof(struct net_device));\
+ } while (0)
+#define napi_schedule_prep(_napi) \
+ (netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi)))
+#define napi_schedule(_napi) \
+ do { \
+ if (napi_schedule_prep(_napi)) \
+ __netif_rx_schedule(napi_to_poll_dev(_napi)); \
+ } while (0)
+#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi))
+#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi))
+#ifdef CONFIG_SMP
+static inline void napi_synchronize(const struct napi_struct *n)
+{
+ struct net_device *dev = napi_to_poll_dev(n);
+
+ while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
+ /* No hurry. */
+ msleep(1);
+ }
+}
+#else
+#define napi_synchronize(n) barrier()
+#endif /* CONFIG_SMP */
+#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi))
+#ifndef NETIF_F_GRO
+#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi))
+#else
+#define napi_complete(_napi) \
+ do { \
+ napi_gro_flush(_napi); \
+ netif_rx_complete(napi_to_poll_dev(_napi)); \
+ } while (0)
+#endif /* NETIF_F_GRO */
+#else /* NAPI */
+#define netif_napi_add(_netdev, _napi, _poll, _weight) \
+ do { \
+ struct napi_struct *__napi = _napi; \
+ _netdev->poll = &(_poll); \
+ _netdev->weight = (_weight); \
+ __napi->poll = &(_poll); \
+ __napi->weight = (_weight); \
+ __napi->dev = (_netdev); \
+ } while (0)
+#define netif_napi_del(_a) do {} while (0)
+#endif /* NAPI */
+
+#undef dev_get_by_name
+#define dev_get_by_name(_a, _b) dev_get_by_name(_b)
+#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b)
+#ifndef DMA_BIT_MASK
+#define DMA_BIT_MASK(n) (((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1))
+#endif
+
+#ifdef NETIF_F_TSO6
+#define skb_is_gso_v6 _kc_skb_is_gso_v6
+static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
+}
+#endif /* NETIF_F_TSO6 */
+
+#ifndef KERN_CONT
+#define KERN_CONT ""
+#endif
+#ifndef pr_err
+#define pr_err(fmt, arg...) \
+ printk(KERN_ERR fmt, ##arg)
+#endif
+#else /* < 2.6.24 */
+#define HAVE_ETHTOOL_GET_SSET_COUNT
+#define HAVE_NETDEV_NAPI_LIST
+#endif /* < 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) )
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
+#include <linux/pm_qos_params.h>
+#else /* >= 3.2.0 */
+#include <linux/pm_qos.h>
+#endif /* else >= 3.2.0 */
+#endif /* > 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) )
+#define PM_QOS_CPU_DMA_LATENCY 1
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) )
+#include <linux/latency.h>
+#define PM_QOS_DEFAULT_VALUE INFINITE_LATENCY
+#define pm_qos_add_requirement(pm_qos_class, name, value) \
+ set_acceptable_latency(name, value)
+#define pm_qos_remove_requirement(pm_qos_class, name) \
+ remove_acceptable_latency(name)
+#define pm_qos_update_requirement(pm_qos_class, name, value) \
+ modify_acceptable_latency(name, value)
+#else
+#define PM_QOS_DEFAULT_VALUE -1
+#define pm_qos_add_requirement(pm_qos_class, name, value)
+#define pm_qos_remove_requirement(pm_qos_class, name)
+#define pm_qos_update_requirement(pm_qos_class, name, value) { \
+ if (value != PM_QOS_DEFAULT_VALUE) { \
+ printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \
+ pci_name(adapter->pdev)); \
+ } \
+}
+
+#endif /* > 2.6.18 */
+
+#define pci_enable_device_mem(pdev) pci_enable_device(pdev)
+
+#ifndef DEFINE_PCI_DEVICE_TABLE
+#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[]
+#endif /* DEFINE_PCI_DEVICE_TABLE */
+
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
+#ifndef IGB_PROCFS
+#define IGB_PROCFS
+#endif /* IGB_PROCFS */
+#endif /* >= 2.6.0 */
+
+#else /* < 2.6.25 */
+
+
+#if IS_ENABLED(CONFIG_HWMON)
+#ifndef IGB_HWMON
+#define IGB_HWMON
+#endif /* IGB_HWMON */
+#endif /* CONFIG_HWMON */
+
+#endif /* < 2.6.25 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
+#ifndef clamp_t
+#define clamp_t(type, val, min, max) ({ \
+ type __val = (val); \
+ type __min = (min); \
+ type __max = (max); \
+ __val = __val < __min ? __min : __val; \
+ __val > __max ? __max : __val; })
+#endif /* clamp_t */
+#undef kzalloc_node
+#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags)
+
+extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state);
+#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s)
+#else /* < 2.6.26 */
+#include <linux/pci-aspm.h>
+#define HAVE_NETDEV_VLAN_FEATURES
+#ifndef PCI_EXP_LNKCAP_ASPMS
+#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */
+#endif /* PCI_EXP_LNKCAP_ASPMS */
+#endif /* < 2.6.26 */
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
+static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep,
+ __u32 speed)
+{
+ ep->speed = (__u16)speed;
+ /* ep->speed_hi = (__u16)(speed >> 16); */
+}
+#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set
+
+static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep)
+{
+ /* no speed_hi before 2.6.27, and probably no need for it yet */
+ return (__u32)ep->speed;
+}
+#define ethtool_cmd_speed _kc_ethtool_cmd_speed
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) )
+#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM))
+#define ANCIENT_PM 1
+#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \
+ defined(CONFIG_PM_SLEEP))
+#define NEWER_PM 1
+#endif
+#if defined(ANCIENT_PM) || defined(NEWER_PM)
+#undef device_set_wakeup_enable
+#define device_set_wakeup_enable(dev, val) \
+ do { \
+ u16 pmc = 0; \
+ int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \
+ if (pm) { \
+ pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \
+ &pmc); \
+ } \
+ (dev)->power.can_wakeup = !!(pmc >> 11); \
+ (dev)->power.should_wakeup = (val && (pmc >> 11)); \
+ } while (0)
+#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */
+#endif /* 2.6.15 through 2.6.27 */
+#ifndef netif_napi_del
+#define netif_napi_del(_a) do {} while (0)
+#ifdef NAPI
+#ifdef CONFIG_NETPOLL
+#undef netif_napi_del
+#define netif_napi_del(_a) list_del(&(_a)->dev_list);
+#endif
+#endif
+#endif /* netif_napi_del */
+#ifdef dma_mapping_error
+#undef dma_mapping_error
+#endif
+#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr)
+
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+#define HAVE_TX_MQ
+#endif
+
+#ifdef HAVE_TX_MQ
+extern void _kc_netif_tx_stop_all_queues(struct net_device *);
+extern void _kc_netif_tx_wake_all_queues(struct net_device *);
+extern void _kc_netif_tx_start_all_queues(struct net_device *);
+#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a)
+#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a)
+#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a)
+#undef netif_stop_subqueue
+#define netif_stop_subqueue(_ndev,_qi) do { \
+ if (netif_is_multiqueue((_ndev))) \
+ netif_stop_subqueue((_ndev), (_qi)); \
+ else \
+ netif_stop_queue((_ndev)); \
+ } while (0)
+#undef netif_start_subqueue
+#define netif_start_subqueue(_ndev,_qi) do { \
+ if (netif_is_multiqueue((_ndev))) \
+ netif_start_subqueue((_ndev), (_qi)); \
+ else \
+ netif_start_queue((_ndev)); \
+ } while (0)
+#else /* HAVE_TX_MQ */
+#define netif_tx_stop_all_queues(a) netif_stop_queue(a)
+#define netif_tx_wake_all_queues(a) netif_wake_queue(a)
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) )
+#define netif_tx_start_all_queues(a) netif_start_queue(a)
+#else
+#define netif_tx_start_all_queues(a) do {} while (0)
+#endif
+#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev))
+#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev))
+#endif /* HAVE_TX_MQ */
+#ifndef NETIF_F_MULTI_QUEUE
+#define NETIF_F_MULTI_QUEUE 0
+#define netif_is_multiqueue(a) 0
+#define netif_wake_subqueue(a, b)
+#endif /* NETIF_F_MULTI_QUEUE */
+
+#ifndef __WARN_printf
+extern void __kc_warn_slowpath(const char *file, const int line,
+ const char *fmt, ...) __attribute__((format(printf, 3, 4)));
+#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg)
+#endif /* __WARN_printf */
+
+#ifndef WARN
+#define WARN(condition, format...) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ __WARN_printf(format); \
+ unlikely(__ret_warn_on); \
+})
+#endif /* WARN */
+#undef HAVE_IXGBE_DEBUG_FS
+#undef HAVE_IGB_DEBUG_FS
+#else /* < 2.6.27 */
+#define HAVE_TX_MQ
+#define HAVE_NETDEV_SELECT_QUEUE
+#ifdef CONFIG_DEBUG_FS
+#define HAVE_IXGBE_DEBUG_FS
+#define HAVE_IGB_DEBUG_FS
+#endif /* CONFIG_DEBUG_FS */
+#endif /* < 2.6.27 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
+#define pci_ioremap_bar(pdev, bar) ioremap(pci_resource_start(pdev, bar), \
+ pci_resource_len(pdev, bar))
+#define pci_wake_from_d3 _kc_pci_wake_from_d3
+#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep
+extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable);
+extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev);
+#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC)
+#ifndef __skb_queue_head_init
+static inline void __kc_skb_queue_head_init(struct sk_buff_head *list)
+{
+ list->prev = list->next = (struct sk_buff *)list;
+ list->qlen = 0;
+}
+#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q)
+#endif
+
+#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */
+#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */
+
+#endif /* < 2.6.28 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
+#ifndef swap
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+#endif
+#define pci_request_selected_regions_exclusive(pdev, bars, name) \
+ pci_request_selected_regions(pdev, bars, name)
+#ifndef CONFIG_NR_CPUS
+#define CONFIG_NR_CPUS 1
+#endif /* CONFIG_NR_CPUS */
+#ifndef pcie_aspm_enabled
+#define pcie_aspm_enabled() (1)
+#endif /* pcie_aspm_enabled */
+
+#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */
+
+#ifndef pci_clear_master
+extern void _kc_pci_clear_master(struct pci_dev *dev);
+#define pci_clear_master(dev) _kc_pci_clear_master(dev)
+#endif
+
+#ifndef PCI_EXP_LNKCTL_ASPMC
+#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */
+#endif
+#else /* < 2.6.29 */
+#ifndef HAVE_NET_DEVICE_OPS
+#define HAVE_NET_DEVICE_OPS
+#endif
+#ifdef CONFIG_DCB
+#define HAVE_PFC_MODE_ENABLE
+#endif /* CONFIG_DCB */
+#endif /* < 2.6.29 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) )
+#define skb_rx_queue_recorded(a) false
+#define skb_get_rx_queue(a) 0
+#define skb_record_rx_queue(a, b) do {} while (0)
+#define skb_tx_hash(n, s) ___kc_skb_tx_hash((n), (s), (n)->real_num_tx_queues)
+#ifndef CONFIG_PCI_IOV
+#undef pci_enable_sriov
+#define pci_enable_sriov(a, b) -ENOTSUPP
+#undef pci_disable_sriov
+#define pci_disable_sriov(a) do {} while (0)
+#endif /* CONFIG_PCI_IOV */
+#ifndef pr_cont
+#define pr_cont(fmt, ...) \
+ printk(KERN_CONT fmt, ##__VA_ARGS__)
+#endif /* pr_cont */
+static inline void _kc_synchronize_irq(unsigned int a)
+{
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) )
+ synchronize_irq();
+#else /* < 2.5.28 */
+ synchronize_irq(a);
+#endif /* < 2.5.28 */
+}
+#undef synchronize_irq
+#define synchronize_irq(a) _kc_synchronize_irq(a)
+
+#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */
+
+#else /* < 2.6.30 */
+#define HAVE_ASPM_QUIRKS
+#endif /* < 2.6.30 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) )
+#define ETH_P_1588 0x88F7
+#define ETH_P_FIP 0x8914
+#ifndef netdev_uc_count
+#define netdev_uc_count(dev) ((dev)->uc_count)
+#endif
+#ifndef netdev_for_each_uc_addr
+#define netdev_for_each_uc_addr(uclist, dev) \
+ for (uclist = dev->uc_list; uclist; uclist = uclist->next)
+#endif
+#ifndef PORT_OTHER
+#define PORT_OTHER 0xff
+#endif
+#ifndef MDIO_PHY_ID_PRTAD
+#define MDIO_PHY_ID_PRTAD 0x03e0
+#endif
+#ifndef MDIO_PHY_ID_DEVAD
+#define MDIO_PHY_ID_DEVAD 0x001f
+#endif
+#ifndef skb_dst
+#define skb_dst(s) ((s)->dst)
+#endif
+
+#ifndef SUPPORTED_1000baseKX_Full
+#define SUPPORTED_1000baseKX_Full (1 << 17)
+#endif
+#ifndef SUPPORTED_10000baseKX4_Full
+#define SUPPORTED_10000baseKX4_Full (1 << 18)
+#endif
+#ifndef SUPPORTED_10000baseKR_Full
+#define SUPPORTED_10000baseKR_Full (1 << 19)
+#endif
+
+#ifndef ADVERTISED_1000baseKX_Full
+#define ADVERTISED_1000baseKX_Full (1 << 17)
+#endif
+#ifndef ADVERTISED_10000baseKX4_Full
+#define ADVERTISED_10000baseKX4_Full (1 << 18)
+#endif
+#ifndef ADVERTISED_10000baseKR_Full
+#define ADVERTISED_10000baseKR_Full (1 << 19)
+#endif
+
+#else /* < 2.6.31 */
+#ifndef HAVE_NETDEV_STORAGE_ADDRESS
+#define HAVE_NETDEV_STORAGE_ADDRESS
+#endif
+#ifndef HAVE_NETDEV_HW_ADDR
+#define HAVE_NETDEV_HW_ADDR
+#endif
+#ifndef HAVE_TRANS_START_IN_QUEUE
+#define HAVE_TRANS_START_IN_QUEUE
+#endif
+#ifndef HAVE_INCLUDE_LINUX_MDIO_H
+#define HAVE_INCLUDE_LINUX_MDIO_H
+#endif
+#endif /* < 2.6.31 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) )
+#undef netdev_tx_t
+#define netdev_tx_t int
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef NETIF_F_FCOE_MTU
+#define NETIF_F_FCOE_MTU (1 << 26)
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+static inline int _kc_pm_runtime_get_sync()
+{
+ return 1;
+}
+#define pm_runtime_get_sync(dev) _kc_pm_runtime_get_sync()
+#else /* 2.6.0 => 2.6.32 */
+static inline int _kc_pm_runtime_get_sync(struct device *dev)
+{
+ return 1;
+}
+#ifndef pm_runtime_get_sync
+#define pm_runtime_get_sync(dev) _kc_pm_runtime_get_sync(dev)
+#endif
+#endif /* 2.6.0 => 2.6.32 */
+#ifndef pm_runtime_put
+#define pm_runtime_put(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_put_sync
+#define pm_runtime_put_sync(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_resume
+#define pm_runtime_resume(dev) do {} while (0)
+#endif
+#ifndef pm_schedule_suspend
+#define pm_schedule_suspend(dev, t) do {} while (0)
+#endif
+#ifndef pm_runtime_set_suspended
+#define pm_runtime_set_suspended(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_disable
+#define pm_runtime_disable(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_put_noidle
+#define pm_runtime_put_noidle(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_set_active
+#define pm_runtime_set_active(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_enable
+#define pm_runtime_enable(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_get_noresume
+#define pm_runtime_get_noresume(dev) do {} while (0)
+#endif
+#else /* < 2.6.32 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE
+#define HAVE_NETDEV_OPS_FCOE_ENABLE
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#ifdef CONFIG_DCB
+#ifndef HAVE_DCBNL_OPS_GETAPP
+#define HAVE_DCBNL_OPS_GETAPP
+#endif
+#endif /* CONFIG_DCB */
+#include <linux/pm_runtime.h>
+/* IOV bad DMA target work arounds require at least this kernel rev support */
+#define HAVE_PCIE_TYPE
+#endif /* < 2.6.32 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) )
+#ifndef pci_pcie_cap
+#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP)
+#endif
+#ifndef IPV4_FLOW
+#define IPV4_FLOW 0x10
+#endif /* IPV4_FLOW */
+#ifndef IPV6_FLOW
+#define IPV6_FLOW 0x11
+#endif /* IPV6_FLOW */
+/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */
+#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \
+ (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) )
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
+#define HAVE_NETDEV_OPS_FCOE_GETWWN
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#endif /* RHEL6 or SLES11 SP1 */
+#ifndef __percpu
+#define __percpu
+#endif /* __percpu */
+#ifndef PORT_DA
+#define PORT_DA PORT_OTHER
+#endif
+#ifndef PORT_NONE
+#define PORT_NONE PORT_OTHER
+#endif
+
+#if ((RHEL_RELEASE_CODE && \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))))
+#if !defined(CONFIG_X86_32) && !defined(CONFIG_NEED_DMA_MAP_STATE)
+#undef DEFINE_DMA_UNMAP_ADDR
+#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME
+#undef DEFINE_DMA_UNMAP_LEN
+#define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME
+#undef dma_unmap_addr
+#define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME)
+#undef dma_unmap_addr_set
+#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) (((PTR)->ADDR_NAME) = (VAL))
+#undef dma_unmap_len
+#define dma_unmap_len(PTR, LEN_NAME) ((PTR)->LEN_NAME)
+#undef dma_unmap_len_set
+#define dma_unmap_len_set(PTR, LEN_NAME, VAL) (((PTR)->LEN_NAME) = (VAL))
+#endif /* CONFIG_X86_64 && !CONFIG_NEED_DMA_MAP_STATE */
+#endif /* RHEL_RELEASE_CODE */
+
+#if (!(RHEL_RELEASE_CODE && \
+ (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,8)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))) || \
+ ((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))))))
+static inline bool pci_is_pcie(struct pci_dev *dev)
+{
+ return !!pci_pcie_cap(dev);
+}
+#endif /* RHEL_RELEASE_CODE */
+
+#ifndef __always_unused
+#define __always_unused __attribute__((__unused__))
+#endif
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((__unused__))
+#endif
+
+#if (!(RHEL_RELEASE_CODE && \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2))))
+#define sk_tx_queue_get(_sk) (-1)
+#define sk_tx_queue_set(_sk, _tx_queue) do {} while(0)
+#endif /* !(RHEL >= 6.2) */
+
+#if (RHEL_RELEASE_CODE && \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,4)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))
+#define HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
+#define HAVE_ETHTOOL_SET_PHYS_ID
+#define HAVE_ETHTOOL_GET_TS_INFO
+#endif /* RHEL >= 6.4 && RHEL < 7.0 */
+
+#if (RHEL_RELEASE_CODE && \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))
+#define HAVE_RHEL6_NETDEV_OPS_EXT_FDB
+#endif /* RHEL >= 6.5 && RHEL < 7.0 */
+
+#else /* < 2.6.33 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
+#define HAVE_NETDEV_OPS_FCOE_GETWWN
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#endif /* < 2.6.33 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) )
+#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
+#ifndef pci_num_vf
+#define pci_num_vf(pdev) _kc_pci_num_vf(pdev)
+extern int _kc_pci_num_vf(struct pci_dev *dev);
+#endif
+#endif /* RHEL_RELEASE_CODE */
+
+#ifndef ETH_FLAG_NTUPLE
+#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE
+#endif
+
+#ifndef netdev_mc_count
+#define netdev_mc_count(dev) ((dev)->mc_count)
+#endif
+#ifndef netdev_mc_empty
+#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0)
+#endif
+#ifndef netdev_for_each_mc_addr
+#define netdev_for_each_mc_addr(mclist, dev) \
+ for (mclist = dev->mc_list; mclist; mclist = mclist->next)
+#endif
+#ifndef netdev_uc_count
+#define netdev_uc_count(dev) ((dev)->uc.count)
+#endif
+#ifndef netdev_uc_empty
+#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0)
+#endif
+#ifndef netdev_for_each_uc_addr
+#define netdev_for_each_uc_addr(ha, dev) \
+ list_for_each_entry(ha, &dev->uc.list, list)
+#endif
+#ifndef dma_set_coherent_mask
+#define dma_set_coherent_mask(dev,mask) \
+ pci_set_consistent_dma_mask(to_pci_dev(dev),(mask))
+#endif
+#ifndef pci_dev_run_wake
+#define pci_dev_run_wake(pdev) (0)
+#endif
+
+/* netdev logging taken from include/linux/netdevice.h */
+#ifndef netdev_name
+static inline const char *_kc_netdev_name(const struct net_device *dev)
+{
+ if (dev->reg_state != NETREG_REGISTERED)
+ return "(unregistered net_device)";
+ return dev->name;
+}
+#define netdev_name(netdev) _kc_netdev_name(netdev)
+#endif /* netdev_name */
+
+#undef netdev_printk
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+#define netdev_printk(level, netdev, format, args...) \
+do { \
+ struct pci_dev *pdev = _kc_netdev_to_pdev(netdev); \
+ printk(level "%s: " format, pci_name(pdev), ##args); \
+} while(0)
+#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
+#define netdev_printk(level, netdev, format, args...) \
+do { \
+ struct pci_dev *pdev = _kc_netdev_to_pdev(netdev); \
+ struct device *dev = pci_dev_to_dev(pdev); \
+ dev_printk(level, dev, "%s: " format, \
+ netdev_name(netdev), ##args); \
+} while(0)
+#else /* 2.6.21 => 2.6.34 */
+#define netdev_printk(level, netdev, format, args...) \
+ dev_printk(level, (netdev)->dev.parent, \
+ "%s: " format, \
+ netdev_name(netdev), ##args)
+#endif /* <2.6.0 <2.6.21 <2.6.34 */
+#undef netdev_emerg
+#define netdev_emerg(dev, format, args...) \
+ netdev_printk(KERN_EMERG, dev, format, ##args)
+#undef netdev_alert
+#define netdev_alert(dev, format, args...) \
+ netdev_printk(KERN_ALERT, dev, format, ##args)
+#undef netdev_crit
+#define netdev_crit(dev, format, args...) \
+ netdev_printk(KERN_CRIT, dev, format, ##args)
+#undef netdev_err
+#define netdev_err(dev, format, args...) \
+ netdev_printk(KERN_ERR, dev, format, ##args)
+#undef netdev_warn
+#define netdev_warn(dev, format, args...) \
+ netdev_printk(KERN_WARNING, dev, format, ##args)
+#undef netdev_notice
+#define netdev_notice(dev, format, args...) \
+ netdev_printk(KERN_NOTICE, dev, format, ##args)
+#undef netdev_info
+#define netdev_info(dev, format, args...) \
+ netdev_printk(KERN_INFO, dev, format, ##args)
+#undef netdev_dbg
+#if defined(DEBUG)
+#define netdev_dbg(__dev, format, args...) \
+ netdev_printk(KERN_DEBUG, __dev, format, ##args)
+#elif defined(CONFIG_DYNAMIC_DEBUG)
+#define netdev_dbg(__dev, format, args...) \
+do { \
+ dynamic_dev_dbg((__dev)->dev.parent, "%s: " format, \
+ netdev_name(__dev), ##args); \
+} while (0)
+#else /* DEBUG */
+#define netdev_dbg(__dev, format, args...) \
+({ \
+ if (0) \
+ netdev_printk(KERN_DEBUG, __dev, format, ##args); \
+ 0; \
+})
+#endif /* DEBUG */
+
+#undef netif_printk
+#define netif_printk(priv, type, level, dev, fmt, args...) \
+do { \
+ if (netif_msg_##type(priv)) \
+ netdev_printk(level, (dev), fmt, ##args); \
+} while (0)
+
+#undef netif_emerg
+#define netif_emerg(priv, type, dev, fmt, args...) \
+ netif_level(emerg, priv, type, dev, fmt, ##args)
+#undef netif_alert
+#define netif_alert(priv, type, dev, fmt, args...) \
+ netif_level(alert, priv, type, dev, fmt, ##args)
+#undef netif_crit
+#define netif_crit(priv, type, dev, fmt, args...) \
+ netif_level(crit, priv, type, dev, fmt, ##args)
+#undef netif_err
+#define netif_err(priv, type, dev, fmt, args...) \
+ netif_level(err, priv, type, dev, fmt, ##args)
+#undef netif_warn
+#define netif_warn(priv, type, dev, fmt, args...) \
+ netif_level(warn, priv, type, dev, fmt, ##args)
+#undef netif_notice
+#define netif_notice(priv, type, dev, fmt, args...) \
+ netif_level(notice, priv, type, dev, fmt, ##args)
+#undef netif_info
+#define netif_info(priv, type, dev, fmt, args...) \
+ netif_level(info, priv, type, dev, fmt, ##args)
+#undef netif_dbg
+#define netif_dbg(priv, type, dev, fmt, args...) \
+ netif_level(dbg, priv, type, dev, fmt, ##args)
+
+#ifdef SET_SYSTEM_SLEEP_PM_OPS
+#define HAVE_SYSTEM_SLEEP_PM_OPS
+#endif
+
+#ifndef for_each_set_bit
+#define for_each_set_bit(bit, addr, size) \
+ for ((bit) = find_first_bit((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+#endif /* for_each_set_bit */
+
+#ifndef DEFINE_DMA_UNMAP_ADDR
+#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR
+#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN
+#define dma_unmap_addr pci_unmap_addr
+#define dma_unmap_addr_set pci_unmap_addr_set
+#define dma_unmap_len pci_unmap_len
+#define dma_unmap_len_set pci_unmap_len_set
+#endif /* DEFINE_DMA_UNMAP_ADDR */
+
+#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,3))
+#ifdef IGB_HWMON
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define sysfs_attr_init(attr) \
+ do { \
+ static struct lock_class_key __key; \
+ (attr)->key = &__key; \
+ } while (0)
+#else
+#define sysfs_attr_init(attr) do {} while (0)
+#endif /* CONFIG_DEBUG_LOCK_ALLOC */
+#endif /* IGB_HWMON */
+#endif /* RHEL_RELEASE_CODE */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+static inline bool _kc_pm_runtime_suspended()
+{
+ return false;
+}
+#define pm_runtime_suspended(dev) _kc_pm_runtime_suspended()
+#else /* 2.6.0 => 2.6.34 */
+static inline bool _kc_pm_runtime_suspended(struct device *dev)
+{
+ return false;
+}
+#ifndef pm_runtime_suspended
+#define pm_runtime_suspended(dev) _kc_pm_runtime_suspended(dev)
+#endif
+#endif /* 2.6.0 => 2.6.34 */
+
+#else /* < 2.6.34 */
+#define HAVE_SYSTEM_SLEEP_PM_OPS
+#ifndef HAVE_SET_RX_MODE
+#define HAVE_SET_RX_MODE
+#endif
+
+#endif /* < 2.6.34 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
+
+ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
+ const void __user *from, size_t count);
+#define simple_write_to_buffer _kc_simple_write_to_buffer
+
+#ifndef numa_node_id
+#define numa_node_id() 0
+#endif
+#ifdef HAVE_TX_MQ
+#include <net/sch_generic.h>
+#ifndef CONFIG_NETDEVICES_MULTIQUEUE
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)))
+void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int);
+#define netif_set_real_num_tx_queues _kc_netif_set_real_num_tx_queues
+#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */
+#else /* CONFIG_NETDEVICES_MULTI_QUEUE */
+#define netif_set_real_num_tx_queues(_netdev, _count) \
+ do { \
+ (_netdev)->egress_subqueue_count = _count; \
+ } while (0)
+#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */
+#else /* HAVE_TX_MQ */
+#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0)
+#endif /* HAVE_TX_MQ */
+#ifndef ETH_FLAG_RXHASH
+#define ETH_FLAG_RXHASH (1<<28)
+#endif /* ETH_FLAG_RXHASH */
+#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0))
+#define HAVE_IRQ_AFFINITY_HINT
+#endif
+#else /* < 2.6.35 */
+#define HAVE_PM_QOS_REQUEST_LIST
+#define HAVE_IRQ_AFFINITY_HINT
+#endif /* < 2.6.35 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
+extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32);
+#define ethtool_op_set_flags _kc_ethtool_op_set_flags
+extern u32 _kc_ethtool_op_get_flags(struct net_device *);
+#define ethtool_op_get_flags _kc_ethtool_op_get_flags
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#ifdef NET_IP_ALIGN
+#undef NET_IP_ALIGN
+#endif
+#define NET_IP_ALIGN 0
+#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+
+#ifdef NET_SKB_PAD
+#undef NET_SKB_PAD
+#endif
+
+#if (L1_CACHE_BYTES > 32)
+#define NET_SKB_PAD L1_CACHE_BYTES
+#else
+#define NET_SKB_PAD 32
+#endif
+
+static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev,
+ unsigned int length)
+{
+ struct sk_buff *skb;
+
+ skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC);
+ if (skb) {
+#if (NET_IP_ALIGN + NET_SKB_PAD)
+ skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD);
+#endif
+ skb->dev = dev;
+ }
+ return skb;
+}
+
+#ifdef netdev_alloc_skb_ip_align
+#undef netdev_alloc_skb_ip_align
+#endif
+#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l)
+
+#undef netif_level
+#define netif_level(level, priv, type, dev, fmt, args...) \
+do { \
+ if (netif_msg_##type(priv)) \
+ netdev_##level(dev, fmt, ##args); \
+} while (0)
+
+#undef usleep_range
+#define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000))
+
+#define u64_stats_update_begin(a) do { } while(0)
+#define u64_stats_update_end(a) do { } while(0)
+#define u64_stats_fetch_begin(a) do { } while(0)
+#define u64_stats_fetch_retry_bh(a) (0)
+#define u64_stats_fetch_begin_bh(a) (0)
+
+#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1))
+#define HAVE_8021P_SUPPORT
+#endif
+
+#else /* < 2.6.36 */
+
+
+#define HAVE_PM_QOS_REQUEST_ACTIVE
+#define HAVE_8021P_SUPPORT
+#define HAVE_NDO_GET_STATS64
+#endif /* < 2.6.36 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) )
+#ifndef netif_set_real_num_rx_queues
+static inline int __kc_netif_set_real_num_rx_queues(struct net_device *dev,
+ unsigned int rxq)
+{
+ return 0;
+}
+#define netif_set_real_num_rx_queues(dev, rxq) \
+ __kc_netif_set_real_num_rx_queues((dev), (rxq))
+#endif
+#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR
+#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2)
+#endif
+#ifndef VLAN_N_VID
+#define VLAN_N_VID VLAN_GROUP_ARRAY_LEN
+#endif /* VLAN_N_VID */
+#ifndef ETH_FLAG_TXVLAN
+#define ETH_FLAG_TXVLAN (1 << 7)
+#endif /* ETH_FLAG_TXVLAN */
+#ifndef ETH_FLAG_RXVLAN
+#define ETH_FLAG_RXVLAN (1 << 8)
+#endif /* ETH_FLAG_RXVLAN */
+
+static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb)
+{
+ WARN_ON(skb->ip_summed != CHECKSUM_NONE);
+}
+#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb)
+
+static inline void *_kc_vzalloc_node(unsigned long size, int node)
+{
+ void *addr = vmalloc_node(size, node);
+ if (addr)
+ memset(addr, 0, size);
+ return addr;
+}
+#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node)
+
+static inline void *_kc_vzalloc(unsigned long size)
+{
+ void *addr = vmalloc(size);
+ if (addr)
+ memset(addr, 0, size);
+ return addr;
+}
+#define vzalloc(_size) _kc_vzalloc(_size)
+
+#ifndef vlan_get_protocol
+static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb)
+{
+ if (vlan_tx_tag_present(skb) ||
+ skb->protocol != cpu_to_be16(ETH_P_8021Q))
+ return skb->protocol;
+
+ if (skb_headlen(skb) < sizeof(struct vlan_ethhdr))
+ return 0;
+
+ return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto;
+}
+#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb)
+#endif
+#ifdef HAVE_HW_TIME_STAMP
+#define SKBTX_HW_TSTAMP (1 << 0)
+#define SKBTX_IN_PROGRESS (1 << 2)
+#define SKB_SHARED_TX_IS_UNION
+#endif
+
+#ifndef device_wakeup_enable
+#define device_wakeup_enable(dev) device_set_wakeup_enable(dev, true)
+#endif
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) )
+#ifndef HAVE_VLAN_RX_REGISTER
+#define HAVE_VLAN_RX_REGISTER
+#endif
+#endif /* > 2.4.18 */
+#endif /* < 2.6.37 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) )
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+#define skb_checksum_start_offset(skb) skb_transport_offset(skb)
+#else /* 2.6.22 -> 2.6.37 */
+static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb)
+{
+ return skb->csum_start - skb_headroom(skb);
+}
+#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb)
+#endif /* 2.6.22 -> 2.6.37 */
+#ifdef CONFIG_DCB
+#ifndef IEEE_8021QAZ_MAX_TCS
+#define IEEE_8021QAZ_MAX_TCS 8
+#endif
+#ifndef DCB_CAP_DCBX_HOST
+#define DCB_CAP_DCBX_HOST 0x01
+#endif
+#ifndef DCB_CAP_DCBX_LLD_MANAGED
+#define DCB_CAP_DCBX_LLD_MANAGED 0x02
+#endif
+#ifndef DCB_CAP_DCBX_VER_CEE
+#define DCB_CAP_DCBX_VER_CEE 0x04
+#endif
+#ifndef DCB_CAP_DCBX_VER_IEEE
+#define DCB_CAP_DCBX_VER_IEEE 0x08
+#endif
+#ifndef DCB_CAP_DCBX_STATIC
+#define DCB_CAP_DCBX_STATIC 0x10
+#endif
+#endif /* CONFIG_DCB */
+#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2))
+#define CONFIG_XPS
+#endif /* RHEL_RELEASE_VERSION(6,2) */
+#endif /* < 2.6.38 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
+#ifndef NETIF_F_RXCSUM
+#define NETIF_F_RXCSUM (1 << 29)
+#endif
+#ifndef skb_queue_reverse_walk_safe
+#define skb_queue_reverse_walk_safe(queue, skb, tmp) \
+ for (skb = (queue)->prev, tmp = skb->prev; \
+ skb != (struct sk_buff *)(queue); \
+ skb = tmp, tmp = skb->prev)
+#endif
+#else /* < 2.6.39 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET
+#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#ifndef HAVE_MQPRIO
+#define HAVE_MQPRIO
+#endif
+#ifndef HAVE_SETUP_TC
+#define HAVE_SETUP_TC
+#endif
+#ifdef CONFIG_DCB
+#ifndef HAVE_DCBNL_IEEE
+#define HAVE_DCBNL_IEEE
+#endif
+#endif /* CONFIG_DCB */
+#ifndef HAVE_NDO_SET_FEATURES
+#define HAVE_NDO_SET_FEATURES
+#endif
+#endif /* < 2.6.39 */
+
+/*****************************************************************************/
+/* use < 2.6.40 because of a Fedora 15 kernel update where they
+ * updated the kernel version to 2.6.40.x and they back-ported 3.0 features
+ * like set_phys_id for ethtool.
+ */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) )
+#ifdef ETHTOOL_GRXRINGS
+#ifndef FLOW_EXT
+#define FLOW_EXT 0x80000000
+union _kc_ethtool_flow_union {
+ struct ethtool_tcpip4_spec tcp_ip4_spec;
+ struct ethtool_usrip4_spec usr_ip4_spec;
+ __u8 hdata[60];
+};
+struct _kc_ethtool_flow_ext {
+ __be16 vlan_etype;
+ __be16 vlan_tci;
+ __be32 data[2];
+};
+struct _kc_ethtool_rx_flow_spec {
+ __u32 flow_type;
+ union _kc_ethtool_flow_union h_u;
+ struct _kc_ethtool_flow_ext h_ext;
+ union _kc_ethtool_flow_union m_u;
+ struct _kc_ethtool_flow_ext m_ext;
+ __u64 ring_cookie;
+ __u32 location;
+};
+#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec
+#endif /* FLOW_EXT */
+#endif
+
+#define pci_disable_link_state_locked pci_disable_link_state
+
+#ifndef PCI_LTR_VALUE_MASK
+#define PCI_LTR_VALUE_MASK 0x000003ff
+#endif
+#ifndef PCI_LTR_SCALE_MASK
+#define PCI_LTR_SCALE_MASK 0x00001c00
+#endif
+#ifndef PCI_LTR_SCALE_SHIFT
+#define PCI_LTR_SCALE_SHIFT 10
+#endif
+
+#else /* < 2.6.40 */
+#define HAVE_ETHTOOL_SET_PHYS_ID
+#endif /* < 2.6.40 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,0,0) )
+#define USE_LEGACY_PM_SUPPORT
+#endif /* < 3.0.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
+#ifndef __netdev_alloc_skb_ip_align
+#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l)
+#endif /* __netdev_alloc_skb_ip_align */
+#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app)
+#define dcb_ieee_delapp(dev, app) 0
+#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority)
+
+/* 1000BASE-T Control register */
+#define CTL1000_AS_MASTER 0x0800
+#define CTL1000_ENABLE_MASTER 0x1000
+
+#else /* < 3.1.0 */
+#ifndef HAVE_DCBNL_IEEE_DELAPP
+#define HAVE_DCBNL_IEEE_DELAPP
+#endif
+#endif /* < 3.1.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
+#ifdef ETHTOOL_GRXRINGS
+#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
+#endif /* ETHTOOL_GRXRINGS */
+
+#ifndef skb_frag_size
+#define skb_frag_size(frag) _kc_skb_frag_size(frag)
+static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag)
+{
+ return frag->size;
+}
+#endif /* skb_frag_size */
+
+#ifndef skb_frag_size_sub
+#define skb_frag_size_sub(frag, delta) _kc_skb_frag_size_sub(frag, delta)
+static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta)
+{
+ frag->size -= delta;
+}
+#endif /* skb_frag_size_sub */
+
+#ifndef skb_frag_page
+#define skb_frag_page(frag) _kc_skb_frag_page(frag)
+static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag)
+{
+ return frag->page;
+}
+#endif /* skb_frag_page */
+
+#ifndef skb_frag_address
+#define skb_frag_address(frag) _kc_skb_frag_address(frag)
+static inline void *_kc_skb_frag_address(const skb_frag_t *frag)
+{
+ return page_address(skb_frag_page(frag)) + frag->page_offset;
+}
+#endif /* skb_frag_address */
+
+#ifndef skb_frag_dma_map
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
+#include <linux/dma-mapping.h>
+#endif
+#define skb_frag_dma_map(dev,frag,offset,size,dir) \
+ _kc_skb_frag_dma_map(dev,frag,offset,size,dir)
+static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev,
+ const skb_frag_t *frag,
+ size_t offset, size_t size,
+ enum dma_data_direction dir)
+{
+ return dma_map_page(dev, skb_frag_page(frag),
+ frag->page_offset + offset, size, dir);
+}
+#endif /* skb_frag_dma_map */
+
+#ifndef __skb_frag_unref
+#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag)
+static inline void __kc_skb_frag_unref(skb_frag_t *frag)
+{
+ put_page(skb_frag_page(frag));
+}
+#endif /* __skb_frag_unref */
+
+#ifndef SPEED_UNKNOWN
+#define SPEED_UNKNOWN -1
+#endif
+#ifndef DUPLEX_UNKNOWN
+#define DUPLEX_UNKNOWN 0xff
+#endif
+#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3))
+#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED
+#define HAVE_PCI_DEV_FLAGS_ASSIGNED
+#endif
+#endif
+#else /* < 3.2.0 */
+#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED
+#define HAVE_PCI_DEV_FLAGS_ASSIGNED
+#define HAVE_VF_SPOOFCHK_CONFIGURE
+#endif
+#endif /* < 3.2.0 */
+
+#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(6,2))
+#undef ixgbe_get_netdev_tc_txq
+#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc])
+#endif
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) )
+typedef u32 kni_netdev_features_t;
+#undef PCI_EXP_TYPE_RC_EC
+#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */
+#ifndef CONFIG_BQL
+#define netdev_tx_completed_queue(_q, _p, _b) do {} while (0)
+#define netdev_completed_queue(_n, _p, _b) do {} while (0)
+#define netdev_tx_sent_queue(_q, _b) do {} while (0)
+#define netdev_sent_queue(_n, _b) do {} while (0)
+#define netdev_tx_reset_queue(_q) do {} while (0)
+#define netdev_reset_queue(_n) do {} while (0)
+#endif
+#else /* ! < 3.3.0 */
+typedef netdev_features_t kni_netdev_features_t;
+#define HAVE_INT_NDO_VLAN_RX_ADD_VID
+#ifdef ETHTOOL_SRXNTUPLE
+#undef ETHTOOL_SRXNTUPLE
+#endif
+#endif /* < 3.3.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
+#ifndef NETIF_F_RXFCS
+#define NETIF_F_RXFCS 0
+#endif /* NETIF_F_RXFCS */
+#ifndef NETIF_F_RXALL
+#define NETIF_F_RXALL 0
+#endif /* NETIF_F_RXALL */
+
+#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0))
+#define NUMTCS_RETURNS_U8
+
+int _kc_simple_open(struct inode *inode, struct file *file);
+#define simple_open _kc_simple_open
+#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */
+
+
+#ifndef skb_add_rx_frag
+#define skb_add_rx_frag _kc_skb_add_rx_frag
+extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *,
+ int, int, unsigned int);
+#endif
+#ifdef NET_ADDR_RANDOM
+#define eth_hw_addr_random(N) do { \
+ random_ether_addr(N->dev_addr); \
+ N->addr_assign_type |= NET_ADDR_RANDOM; \
+ } while (0)
+#else /* NET_ADDR_RANDOM */
+#define eth_hw_addr_random(N) random_ether_addr(N->dev_addr)
+#endif /* NET_ADDR_RANDOM */
+#else /* < 3.4.0 */
+#include <linux/kconfig.h>
+#endif /* >= 3.4.0 */
+
+/*****************************************************************************/
+#if defined(E1000E_PTP) || defined(IGB_PTP) || defined(IXGBE_PTP) || defined(I40E_PTP)
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,0) ) && IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+#define HAVE_PTP_1588_CLOCK
+#else
+#error Cannot enable PTP Hardware Clock support due to a pre-3.0 kernel version or CONFIG_PTP_1588_CLOCK not enabled in the kernel
+#endif /* > 3.0.0 && IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+#endif /* E1000E_PTP || IGB_PTP || IXGBE_PTP || I40E_PTP */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) )
+#define skb_tx_timestamp(skb) do {} while (0)
+static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2)
+{
+ return !compare_ether_addr(addr1, addr2);
+}
+#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2))
+#else
+#define HAVE_FDB_OPS
+#define HAVE_ETHTOOL_GET_TS_INFO
+#endif /* < 3.5.0 */
+
+/*****************************************************************************/
+#include <linux/mdio.h>
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0) )
+#define PCI_EXP_LNKCAP2 44 /* Link Capability 2 */
+
+#ifndef MDIO_EEE_100TX
+#define MDIO_EEE_100TX 0x0002 /* 100TX EEE cap */
+#endif
+#ifndef MDIO_EEE_1000T
+#define MDIO_EEE_1000T 0x0004 /* 1000T EEE cap */
+#endif
+#ifndef MDIO_EEE_10GT
+#define MDIO_EEE_10GT 0x0008 /* 10GT EEE cap */
+#endif
+#ifndef MDIO_EEE_1000KX
+#define MDIO_EEE_1000KX 0x0010 /* 1000KX EEE cap */
+#endif
+#ifndef MDIO_EEE_10GKX4
+#define MDIO_EEE_10GKX4 0x0020 /* 10G KX4 EEE cap */
+#endif
+#ifndef MDIO_EEE_10GKR
+#define MDIO_EEE_10GKR 0x0040 /* 10G KR EEE cap */
+#endif
+#endif /* < 3.6.0 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) )
+#ifndef ADVERTISED_40000baseKR4_Full
+/* these defines were all added in one commit, so should be safe
+ * to trigger activiation on one define
+ */
+#define SUPPORTED_40000baseKR4_Full (1 << 23)
+#define SUPPORTED_40000baseCR4_Full (1 << 24)
+#define SUPPORTED_40000baseSR4_Full (1 << 25)
+#define SUPPORTED_40000baseLR4_Full (1 << 26)
+#define ADVERTISED_40000baseKR4_Full (1 << 23)
+#define ADVERTISED_40000baseCR4_Full (1 << 24)
+#define ADVERTISED_40000baseSR4_Full (1 << 25)
+#define ADVERTISED_40000baseLR4_Full (1 << 26)
+#endif
+
+/**
+ * mmd_eee_cap_to_ethtool_sup_t
+ * @eee_cap: value of the MMD EEE Capability register
+ *
+ * A small helper function that translates MMD EEE Capability (3.20) bits
+ * to ethtool supported settings.
+ */
+static inline u32 __kc_mmd_eee_cap_to_ethtool_sup_t(u16 eee_cap)
+{
+ u32 supported = 0;
+
+ if (eee_cap & MDIO_EEE_100TX)
+ supported |= SUPPORTED_100baseT_Full;
+ if (eee_cap & MDIO_EEE_1000T)
+ supported |= SUPPORTED_1000baseT_Full;
+ if (eee_cap & MDIO_EEE_10GT)
+ supported |= SUPPORTED_10000baseT_Full;
+ if (eee_cap & MDIO_EEE_1000KX)
+ supported |= SUPPORTED_1000baseKX_Full;
+ if (eee_cap & MDIO_EEE_10GKX4)
+ supported |= SUPPORTED_10000baseKX4_Full;
+ if (eee_cap & MDIO_EEE_10GKR)
+ supported |= SUPPORTED_10000baseKR_Full;
+
+ return supported;
+}
+#define mmd_eee_cap_to_ethtool_sup_t(eee_cap) \
+ __kc_mmd_eee_cap_to_ethtool_sup_t(eee_cap)
+
+/**
+ * mmd_eee_adv_to_ethtool_adv_t
+ * @eee_adv: value of the MMD EEE Advertisement/Link Partner Ability registers
+ *
+ * A small helper function that translates the MMD EEE Advertisement (7.60)
+ * and MMD EEE Link Partner Ability (7.61) bits to ethtool advertisement
+ * settings.
+ */
+static inline u32 __kc_mmd_eee_adv_to_ethtool_adv_t(u16 eee_adv)
+{
+ u32 adv = 0;
+
+ if (eee_adv & MDIO_EEE_100TX)
+ adv |= ADVERTISED_100baseT_Full;
+ if (eee_adv & MDIO_EEE_1000T)
+ adv |= ADVERTISED_1000baseT_Full;
+ if (eee_adv & MDIO_EEE_10GT)
+ adv |= ADVERTISED_10000baseT_Full;
+ if (eee_adv & MDIO_EEE_1000KX)
+ adv |= ADVERTISED_1000baseKX_Full;
+ if (eee_adv & MDIO_EEE_10GKX4)
+ adv |= ADVERTISED_10000baseKX4_Full;
+ if (eee_adv & MDIO_EEE_10GKR)
+ adv |= ADVERTISED_10000baseKR_Full;
+
+ return adv;
+}
+#define mmd_eee_adv_to_ethtool_adv_t(eee_adv) \
+ __kc_mmd_eee_adv_to_ethtool_adv_t(eee_adv)
+
+/**
+ * ethtool_adv_to_mmd_eee_adv_t
+ * @adv: the ethtool advertisement settings
+ *
+ * A small helper function that translates ethtool advertisement settings
+ * to EEE advertisements for the MMD EEE Advertisement (7.60) and
+ * MMD EEE Link Partner Ability (7.61) registers.
+ */
+static inline u16 __kc_ethtool_adv_to_mmd_eee_adv_t(u32 adv)
+{
+ u16 reg = 0;
+
+ if (adv & ADVERTISED_100baseT_Full)
+ reg |= MDIO_EEE_100TX;
+ if (adv & ADVERTISED_1000baseT_Full)
+ reg |= MDIO_EEE_1000T;
+ if (adv & ADVERTISED_10000baseT_Full)
+ reg |= MDIO_EEE_10GT;
+ if (adv & ADVERTISED_1000baseKX_Full)
+ reg |= MDIO_EEE_1000KX;
+ if (adv & ADVERTISED_10000baseKX4_Full)
+ reg |= MDIO_EEE_10GKX4;
+ if (adv & ADVERTISED_10000baseKR_Full)
+ reg |= MDIO_EEE_10GKR;
+
+ return reg;
+}
+#define ethtool_adv_to_mmd_eee_adv_t(adv) \
+ __kc_ethtool_adv_to_mmd_eee_adv_t(adv)
+
+#ifndef pci_pcie_type
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
+static inline u8 pci_pcie_type(struct pci_dev *pdev)
+{
+ int pos;
+ u16 reg16;
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+ if (!pos)
+ BUG();
+ pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+ return (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
+}
+#else /* < 2.6.24 */
+#define pci_pcie_type(x) (x)->pcie_type
+#endif /* < 2.6.24 */
+#endif /* pci_pcie_type */
+
+#define ptp_clock_register(caps, args...) ptp_clock_register(caps)
+
+#ifndef PCI_EXP_LNKSTA2
+int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val);
+#define pcie_capability_read_word(d,p,v) __kc_pcie_capability_read_word(d,p,v)
+int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val);
+#define pcie_capability_write_word(d,p,v) __kc_pcie_capability_write_word(d,p,v)
+int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
+ u16 clear, u16 set);
+#define pcie_capability_clear_and_set_word(d,p,c,s) \
+ __kc_pcie_capability_clear_and_set_word(d,p,c,s)
+
+#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */
+
+static inline int pcie_capability_clear_word(struct pci_dev *dev, int pos,
+ u16 clear)
+{
+ return __kc_pcie_capability_clear_and_set_word(dev, pos, clear, 0);
+}
+#endif /* !PCI_EXP_LNKSTA2 */
+
+#if (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0))
+#define USE_CONST_DEV_UC_CHAR
+#endif
+
+#else /* >= 3.7.0 */
+#define HAVE_CONST_STRUCT_PCI_ERROR_HANDLERS
+#define USE_CONST_DEV_UC_CHAR
+#endif /* >= 3.7.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) )
+#ifndef PCI_EXP_LNKCTL_ASPM_L0S
+#define PCI_EXP_LNKCTL_ASPM_L0S 0x01 /* L0s Enable */
+#endif
+#ifndef PCI_EXP_LNKCTL_ASPM_L1
+#define PCI_EXP_LNKCTL_ASPM_L1 0x02 /* L1 Enable */
+#endif
+#define HAVE_CONFIG_HOTPLUG
+/* Reserved Ethernet Addresses per IEEE 802.1Q */
+static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = {
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) &&\
+ !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5))
+static inline bool is_link_local_ether_addr(const u8 *addr)
+{
+ __be16 *a = (__be16 *)addr;
+ static const __be16 *b = (const __be16 *)eth_reserved_addr_base;
+ static const __be16 m = cpu_to_be16(0xfff0);
+
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
+}
+#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */
+#else /* >= 3.8.0 */
+#ifndef __devinit
+#define __devinit
+#define HAVE_ENCAP_CSUM_OFFLOAD
+#endif
+
+#ifndef __devinitdata
+#define __devinitdata
+#endif
+
+#ifndef __devexit
+#define __devexit
+#endif
+
+#ifndef __devexit_p
+#define __devexit_p
+#endif
+
+#ifndef HAVE_SRIOV_CONFIGURE
+#define HAVE_SRIOV_CONFIGURE
+#endif
+
+#define HAVE_BRIDGE_ATTRIBS
+#ifndef BRIDGE_MODE_VEB
+#define BRIDGE_MODE_VEB 0 /* Default loopback mode */
+#endif /* BRIDGE_MODE_VEB */
+#ifndef BRIDGE_MODE_VEPA
+#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */
+#endif /* BRIDGE_MODE_VEPA */
+#endif /* >= 3.8.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) )
+
+#undef hlist_entry
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#undef hlist_entry_safe
+#define hlist_entry_safe(ptr, type, member) \
+ (ptr) ? hlist_entry(ptr, type, member) : NULL
+
+#undef hlist_for_each_entry
+#define hlist_for_each_entry(pos, head, member) \
+ for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \
+ pos; \
+ pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
+
+#undef hlist_for_each_entry_safe
+#define hlist_for_each_entry_safe(pos, n, head, member) \
+ for (pos = hlist_entry_safe((head)->first, typeof(*pos), member); \
+ pos && ({ n = pos->member.next; 1; }); \
+ pos = hlist_entry_safe(n, typeof(*pos), member))
+
+#ifdef CONFIG_XPS
+extern int __kc_netif_set_xps_queue(struct net_device *, struct cpumask *, u16);
+#define netif_set_xps_queue(_dev, _mask, _idx) __kc_netif_set_xps_queue((_dev), (_mask), (_idx))
+#else /* CONFIG_XPS */
+#define netif_set_xps_queue(_dev, _mask, _idx) do {} while (0)
+#endif /* CONFIG_XPS */
+
+#ifdef HAVE_NETDEV_SELECT_QUEUE
+#define _kc_hashrnd 0xd631614b /* not so random hash salt */
+extern u16 __kc_netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
+#define __netdev_pick_tx __kc_netdev_pick_tx
+#endif /* HAVE_NETDEV_SELECT_QUEUE */
+#else
+#define HAVE_BRIDGE_FILTER
+#define USE_DEFAULT_FDB_DEL_DUMP
+#endif /* < 3.9.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#ifdef CONFIG_PCI_IOV
+extern int __kc_pci_vfs_assigned(struct pci_dev *dev);
+#else
+static inline int __kc_pci_vfs_assigned(struct pci_dev *dev)
+{
+ return 0;
+}
+#endif
+#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev)
+
+#ifndef VLAN_TX_COOKIE_MAGIC
+static inline struct sk_buff *__kc__vlan_hwaccel_put_tag(struct sk_buff *skb,
+ u16 vlan_tci)
+{
+#ifdef VLAN_TAG_PRESENT
+ vlan_tci |= VLAN_TAG_PRESENT;
+#endif
+ skb->vlan_tci = vlan_tci;
+ return skb;
+}
+#define __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci) \
+ __kc__vlan_hwaccel_put_tag(skb, vlan_tci)
+#endif
+
+#else /* >= 3.10.0 */
+#define HAVE_ENCAP_TSO_OFFLOAD
+#endif /* >= 3.10.0 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) )
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,6)))
+#if (!(UBUNTU_KERNEL_CODE >= UBUNTU_KERNEL_VERSION(3,13,0,30,0) \
+ && (UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(12,4) \
+ || UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(14,4))))
+#if (!(SLE_VERSION_CODE == SLE_VERSION(12,0,0)))
+#ifdef NETIF_F_RXHASH
+#define PKT_HASH_TYPE_L3 0
+static inline void
+skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
+{
+ skb->rxhash = hash;
+}
+#endif /* NETIF_F_RXHASH */
+#endif /* < SLES12 */
+#endif /* < 3.13.0-30.54 (Ubuntu 14.04) */
+#endif /* < RHEL7 */
+#endif /* < 3.14.0 */
+
+#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) ) \
+ || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#undef SET_ETHTOOL_OPS
+#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops))
+#define HAVE_VF_MIN_MAX_TXRATE 1
+#endif /* >= 3.16.0 */
+
+#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \
+ || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#define HAVE_NDO_DFLT_BRIDGE_ADD_MASK
+#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#define HAVE_NDO_FDB_ADD_VID
+#endif /* !RHEL 7.2 */
+#endif /* >= 3.19.0 */
+
+#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) ) \
+ || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+/* vlan_tx_xx functions got renamed to skb_vlan */
+#define vlan_tx_tag_get skb_vlan_tag_get
+#define vlan_tx_tag_present skb_vlan_tag_present
+#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS
+#endif /* !RHEL 7.2 */
+#endif /* 4.0.0 */
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) )
+/* ndo_bridge_getlink adds new nlflags parameter */
+#define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
+#endif /* >= 4.1.0 */
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) )
+/* ndo_bridge_getlink adds new filter_mask and vlan_fill parameters */
+#define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL
+#endif /* >= 4.2.0 */
+#endif /* _KCOMPAT_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c
new file mode 100644
index 00000000..e1a89388
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c
@@ -0,0 +1,1171 @@
+/*******************************************************************************
+
+ Intel(R) Gigabit Ethernet Linux driver
+ Copyright(c) 2007-2013 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/*
+ * net/core/ethtool.c - Ethtool ioctl handler
+ * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
+ *
+ * This file is where we call all the ethtool_ops commands to get
+ * the information ethtool needs. We fall back to calling do_ioctl()
+ * for drivers which haven't been converted to ethtool_ops yet.
+ *
+ * It's GPL, stupid.
+ *
+ * Modification by sfeldma@pobox.com to work as backward compat
+ * solution for pre-ethtool_ops kernels.
+ * - copied struct ethtool_ops from ethtool.h
+ * - defined SET_ETHTOOL_OPS
+ * - put in some #ifndef NETIF_F_xxx wrappers
+ * - changes refs to dev->ethtool_ops to ethtool_ops
+ * - changed dev_ethtool to ethtool_ioctl
+ * - remove EXPORT_SYMBOL()s
+ * - added _kc_ prefix in built-in ethtool_op_xxx ops.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <asm/uaccess.h>
+
+#include "kcompat.h"
+
+#undef SUPPORTED_10000baseT_Full
+#define SUPPORTED_10000baseT_Full (1 << 12)
+#undef ADVERTISED_10000baseT_Full
+#define ADVERTISED_10000baseT_Full (1 << 12)
+#undef SPEED_10000
+#define SPEED_10000 10000
+
+#undef ethtool_ops
+#define ethtool_ops _kc_ethtool_ops
+
+struct _kc_ethtool_ops {
+ int (*get_settings)(struct net_device *, struct ethtool_cmd *);
+ int (*set_settings)(struct net_device *, struct ethtool_cmd *);
+ void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
+ int (*get_regs_len)(struct net_device *);
+ void (*get_regs)(struct net_device *, struct ethtool_regs *, void *);
+ void (*get_wol)(struct net_device *, struct ethtool_wolinfo *);
+ int (*set_wol)(struct net_device *, struct ethtool_wolinfo *);
+ u32 (*get_msglevel)(struct net_device *);
+ void (*set_msglevel)(struct net_device *, u32);
+ int (*nway_reset)(struct net_device *);
+ u32 (*get_link)(struct net_device *);
+ int (*get_eeprom_len)(struct net_device *);
+ int (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
+ int (*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
+ int (*get_coalesce)(struct net_device *, struct ethtool_coalesce *);
+ int (*set_coalesce)(struct net_device *, struct ethtool_coalesce *);
+ void (*get_ringparam)(struct net_device *, struct ethtool_ringparam *);
+ int (*set_ringparam)(struct net_device *, struct ethtool_ringparam *);
+ void (*get_pauseparam)(struct net_device *,
+ struct ethtool_pauseparam*);
+ int (*set_pauseparam)(struct net_device *,
+ struct ethtool_pauseparam*);
+ u32 (*get_rx_csum)(struct net_device *);
+ int (*set_rx_csum)(struct net_device *, u32);
+ u32 (*get_tx_csum)(struct net_device *);
+ int (*set_tx_csum)(struct net_device *, u32);
+ u32 (*get_sg)(struct net_device *);
+ int (*set_sg)(struct net_device *, u32);
+ u32 (*get_tso)(struct net_device *);
+ int (*set_tso)(struct net_device *, u32);
+ int (*self_test_count)(struct net_device *);
+ void (*self_test)(struct net_device *, struct ethtool_test *, u64 *);
+ void (*get_strings)(struct net_device *, u32 stringset, u8 *);
+ int (*phys_id)(struct net_device *, u32);
+ int (*get_stats_count)(struct net_device *);
+ void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *,
+ u64 *);
+} *ethtool_ops = NULL;
+
+#undef SET_ETHTOOL_OPS
+#define SET_ETHTOOL_OPS(netdev, ops) (ethtool_ops = (ops))
+
+/*
+ * Some useful ethtool_ops methods that are device independent. If we find that
+ * all drivers want to do the same thing here, we can turn these into dev_()
+ * function calls.
+ */
+
+#undef ethtool_op_get_link
+#define ethtool_op_get_link _kc_ethtool_op_get_link
+u32 _kc_ethtool_op_get_link(struct net_device *dev)
+{
+ return netif_carrier_ok(dev) ? 1 : 0;
+}
+
+#undef ethtool_op_get_tx_csum
+#define ethtool_op_get_tx_csum _kc_ethtool_op_get_tx_csum
+u32 _kc_ethtool_op_get_tx_csum(struct net_device *dev)
+{
+#ifdef NETIF_F_IP_CSUM
+ return (dev->features & NETIF_F_IP_CSUM) != 0;
+#else
+ return 0;
+#endif
+}
+
+#undef ethtool_op_set_tx_csum
+#define ethtool_op_set_tx_csum _kc_ethtool_op_set_tx_csum
+int _kc_ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
+{
+#ifdef NETIF_F_IP_CSUM
+ if (data)
+#ifdef NETIF_F_IPV6_CSUM
+ dev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+ else
+ dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+#else
+ dev->features |= NETIF_F_IP_CSUM;
+ else
+ dev->features &= ~NETIF_F_IP_CSUM;
+#endif
+#endif
+
+ return 0;
+}
+
+#undef ethtool_op_get_sg
+#define ethtool_op_get_sg _kc_ethtool_op_get_sg
+u32 _kc_ethtool_op_get_sg(struct net_device *dev)
+{
+#ifdef NETIF_F_SG
+ return (dev->features & NETIF_F_SG) != 0;
+#else
+ return 0;
+#endif
+}
+
+#undef ethtool_op_set_sg
+#define ethtool_op_set_sg _kc_ethtool_op_set_sg
+int _kc_ethtool_op_set_sg(struct net_device *dev, u32 data)
+{
+#ifdef NETIF_F_SG
+ if (data)
+ dev->features |= NETIF_F_SG;
+ else
+ dev->features &= ~NETIF_F_SG;
+#endif
+
+ return 0;
+}
+
+#undef ethtool_op_get_tso
+#define ethtool_op_get_tso _kc_ethtool_op_get_tso
+u32 _kc_ethtool_op_get_tso(struct net_device *dev)
+{
+#ifdef NETIF_F_TSO
+ return (dev->features & NETIF_F_TSO) != 0;
+#else
+ return 0;
+#endif
+}
+
+#undef ethtool_op_set_tso
+#define ethtool_op_set_tso _kc_ethtool_op_set_tso
+int _kc_ethtool_op_set_tso(struct net_device *dev, u32 data)
+{
+#ifdef NETIF_F_TSO
+ if (data)
+ dev->features |= NETIF_F_TSO;
+ else
+ dev->features &= ~NETIF_F_TSO;
+#endif
+
+ return 0;
+}
+
+/* Handlers for each ethtool command */
+
+static int ethtool_get_settings(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_cmd cmd = { ETHTOOL_GSET };
+ int err;
+
+ if (!ethtool_ops->get_settings)
+ return -EOPNOTSUPP;
+
+ err = ethtool_ops->get_settings(dev, &cmd);
+ if (err < 0)
+ return err;
+
+ if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_settings(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_cmd cmd;
+
+ if (!ethtool_ops->set_settings)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+ return -EFAULT;
+
+ return ethtool_ops->set_settings(dev, &cmd);
+}
+
+static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_drvinfo info;
+ struct ethtool_ops *ops = ethtool_ops;
+
+ if (!ops->get_drvinfo)
+ return -EOPNOTSUPP;
+
+ memset(&info, 0, sizeof(info));
+ info.cmd = ETHTOOL_GDRVINFO;
+ ops->get_drvinfo(dev, &info);
+
+ if (ops->self_test_count)
+ info.testinfo_len = ops->self_test_count(dev);
+ if (ops->get_stats_count)
+ info.n_stats = ops->get_stats_count(dev);
+ if (ops->get_regs_len)
+ info.regdump_len = ops->get_regs_len(dev);
+ if (ops->get_eeprom_len)
+ info.eedump_len = ops->get_eeprom_len(dev);
+
+ if (copy_to_user(useraddr, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_get_regs(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_regs regs;
+ struct ethtool_ops *ops = ethtool_ops;
+ void *regbuf;
+ int reglen, ret;
+
+ if (!ops->get_regs || !ops->get_regs_len)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&regs, useraddr, sizeof(regs)))
+ return -EFAULT;
+
+ reglen = ops->get_regs_len(dev);
+ if (regs.len > reglen)
+ regs.len = reglen;
+
+ regbuf = kmalloc(reglen, GFP_USER);
+ if (!regbuf)
+ return -ENOMEM;
+
+ ops->get_regs(dev, &regs, regbuf);
+
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, &regs, sizeof(regs)))
+ goto out;
+ useraddr += offsetof(struct ethtool_regs, data);
+ if (copy_to_user(useraddr, regbuf, reglen))
+ goto out;
+ ret = 0;
+
+out:
+ kfree(regbuf);
+ return ret;
+}
+
+static int ethtool_get_wol(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
+
+ if (!ethtool_ops->get_wol)
+ return -EOPNOTSUPP;
+
+ ethtool_ops->get_wol(dev, &wol);
+
+ if (copy_to_user(useraddr, &wol, sizeof(wol)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_wol(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_wolinfo wol;
+
+ if (!ethtool_ops->set_wol)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&wol, useraddr, sizeof(wol)))
+ return -EFAULT;
+
+ return ethtool_ops->set_wol(dev, &wol);
+}
+
+static int ethtool_get_msglevel(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_value edata = { ETHTOOL_GMSGLVL };
+
+ if (!ethtool_ops->get_msglevel)
+ return -EOPNOTSUPP;
+
+ edata.data = ethtool_ops->get_msglevel(dev);
+
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_msglevel(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_value edata;
+
+ if (!ethtool_ops->set_msglevel)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ ethtool_ops->set_msglevel(dev, edata.data);
+ return 0;
+}
+
+static int ethtool_nway_reset(struct net_device *dev)
+{
+ if (!ethtool_ops->nway_reset)
+ return -EOPNOTSUPP;
+
+ return ethtool_ops->nway_reset(dev);
+}
+
+static int ethtool_get_link(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_value edata = { ETHTOOL_GLINK };
+
+ if (!ethtool_ops->get_link)
+ return -EOPNOTSUPP;
+
+ edata.data = ethtool_ops->get_link(dev);
+
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_get_eeprom(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_eeprom eeprom;
+ struct ethtool_ops *ops = ethtool_ops;
+ u8 *data;
+ int ret;
+
+ if (!ops->get_eeprom || !ops->get_eeprom_len)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+ return -EFAULT;
+
+ /* Check for wrap and zero */
+ if (eeprom.offset + eeprom.len <= eeprom.offset)
+ return -EINVAL;
+
+ /* Check for exceeding total eeprom len */
+ if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+ return -EINVAL;
+
+ data = kmalloc(eeprom.len, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+
+ ret = -EFAULT;
+ if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
+ goto out;
+
+ ret = ops->get_eeprom(dev, &eeprom, data);
+ if (ret)
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
+ goto out;
+ if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
+ goto out;
+ ret = 0;
+
+out:
+ kfree(data);
+ return ret;
+}
+
+static int ethtool_set_eeprom(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_eeprom eeprom;
+ struct ethtool_ops *ops = ethtool_ops;
+ u8 *data;
+ int ret;
+
+ if (!ops->set_eeprom || !ops->get_eeprom_len)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+ return -EFAULT;
+
+ /* Check for wrap and zero */
+ if (eeprom.offset + eeprom.len <= eeprom.offset)
+ return -EINVAL;
+
+ /* Check for exceeding total eeprom len */
+ if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+ return -EINVAL;
+
+ data = kmalloc(eeprom.len, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+
+ ret = -EFAULT;
+ if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
+ goto out;
+
+ ret = ops->set_eeprom(dev, &eeprom, data);
+ if (ret)
+ goto out;
+
+ if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
+ ret = -EFAULT;
+
+out:
+ kfree(data);
+ return ret;
+}
+
+static int ethtool_get_coalesce(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
+
+ if (!ethtool_ops->get_coalesce)
+ return -EOPNOTSUPP;
+
+ ethtool_ops->get_coalesce(dev, &coalesce);
+
+ if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_coalesce(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_coalesce coalesce;
+
+ if (!ethtool_ops->get_coalesce)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
+ return -EFAULT;
+
+ return ethtool_ops->set_coalesce(dev, &coalesce);
+}
+
+static int ethtool_get_ringparam(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
+
+ if (!ethtool_ops->get_ringparam)
+ return -EOPNOTSUPP;
+
+ ethtool_ops->get_ringparam(dev, &ringparam);
+
+ if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_ringparam(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_ringparam ringparam;
+
+ if (!ethtool_ops->get_ringparam)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
+ return -EFAULT;
+
+ return ethtool_ops->set_ringparam(dev, &ringparam);
+}
+
+static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
+
+ if (!ethtool_ops->get_pauseparam)
+ return -EOPNOTSUPP;
+
+ ethtool_ops->get_pauseparam(dev, &pauseparam);
+
+ if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_pauseparam pauseparam;
+
+ if (!ethtool_ops->get_pauseparam)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
+ return -EFAULT;
+
+ return ethtool_ops->set_pauseparam(dev, &pauseparam);
+}
+
+static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_value edata = { ETHTOOL_GRXCSUM };
+
+ if (!ethtool_ops->get_rx_csum)
+ return -EOPNOTSUPP;
+
+ edata.data = ethtool_ops->get_rx_csum(dev);
+
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_value edata;
+
+ if (!ethtool_ops->set_rx_csum)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ ethtool_ops->set_rx_csum(dev, edata.data);
+ return 0;
+}
+
+static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_value edata = { ETHTOOL_GTXCSUM };
+
+ if (!ethtool_ops->get_tx_csum)
+ return -EOPNOTSUPP;
+
+ edata.data = ethtool_ops->get_tx_csum(dev);
+
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_value edata;
+
+ if (!ethtool_ops->set_tx_csum)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ return ethtool_ops->set_tx_csum(dev, edata.data);
+}
+
+static int ethtool_get_sg(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_value edata = { ETHTOOL_GSG };
+
+ if (!ethtool_ops->get_sg)
+ return -EOPNOTSUPP;
+
+ edata.data = ethtool_ops->get_sg(dev);
+
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_sg(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_value edata;
+
+ if (!ethtool_ops->set_sg)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ return ethtool_ops->set_sg(dev, edata.data);
+}
+
+static int ethtool_get_tso(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_value edata = { ETHTOOL_GTSO };
+
+ if (!ethtool_ops->get_tso)
+ return -EOPNOTSUPP;
+
+ edata.data = ethtool_ops->get_tso(dev);
+
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_tso(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_value edata;
+
+ if (!ethtool_ops->set_tso)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ return ethtool_ops->set_tso(dev, edata.data);
+}
+
+static int ethtool_self_test(struct net_device *dev, char *useraddr)
+{
+ struct ethtool_test test;
+ struct ethtool_ops *ops = ethtool_ops;
+ u64 *data;
+ int ret;
+
+ if (!ops->self_test || !ops->self_test_count)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&test, useraddr, sizeof(test)))
+ return -EFAULT;
+
+ test.len = ops->self_test_count(dev);
+ data = kmalloc(test.len * sizeof(u64), GFP_USER);
+ if (!data)
+ return -ENOMEM;
+
+ ops->self_test(dev, &test, data);
+
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, &test, sizeof(test)))
+ goto out;
+ useraddr += sizeof(test);
+ if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
+ goto out;
+ ret = 0;
+
+out:
+ kfree(data);
+ return ret;
+}
+
+static int ethtool_get_strings(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_gstrings gstrings;
+ struct ethtool_ops *ops = ethtool_ops;
+ u8 *data;
+ int ret;
+
+ if (!ops->get_strings)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
+ return -EFAULT;
+
+ switch (gstrings.string_set) {
+ case ETH_SS_TEST:
+ if (!ops->self_test_count)
+ return -EOPNOTSUPP;
+ gstrings.len = ops->self_test_count(dev);
+ break;
+ case ETH_SS_STATS:
+ if (!ops->get_stats_count)
+ return -EOPNOTSUPP;
+ gstrings.len = ops->get_stats_count(dev);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+
+ ops->get_strings(dev, gstrings.string_set, data);
+
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
+ goto out;
+ useraddr += sizeof(gstrings);
+ if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+ goto out;
+ ret = 0;
+
+out:
+ kfree(data);
+ return ret;
+}
+
+static int ethtool_phys_id(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_value id;
+
+ if (!ethtool_ops->phys_id)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&id, useraddr, sizeof(id)))
+ return -EFAULT;
+
+ return ethtool_ops->phys_id(dev, id.data);
+}
+
+static int ethtool_get_stats(struct net_device *dev, void *useraddr)
+{
+ struct ethtool_stats stats;
+ struct ethtool_ops *ops = ethtool_ops;
+ u64 *data;
+ int ret;
+
+ if (!ops->get_ethtool_stats || !ops->get_stats_count)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&stats, useraddr, sizeof(stats)))
+ return -EFAULT;
+
+ stats.n_stats = ops->get_stats_count(dev);
+ data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
+ if (!data)
+ return -ENOMEM;
+
+ ops->get_ethtool_stats(dev, &stats, data);
+
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, &stats, sizeof(stats)))
+ goto out;
+ useraddr += sizeof(stats);
+ if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
+ goto out;
+ ret = 0;
+
+out:
+ kfree(data);
+ return ret;
+}
+
+/* The main entry point in this file. Called from net/core/dev.c */
+
+#define ETHTOOL_OPS_COMPAT
+int ethtool_ioctl(struct ifreq *ifr)
+{
+ struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+ void *useraddr = (void *) ifr->ifr_data;
+ u32 ethcmd;
+
+ /*
+ * XXX: This can be pushed down into the ethtool_* handlers that
+ * need it. Keep existing behavior for the moment.
+ */
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!dev || !netif_device_present(dev))
+ return -ENODEV;
+
+ if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+ return -EFAULT;
+
+ switch (ethcmd) {
+ case ETHTOOL_GSET:
+ return ethtool_get_settings(dev, useraddr);
+ case ETHTOOL_SSET:
+ return ethtool_set_settings(dev, useraddr);
+ case ETHTOOL_GDRVINFO:
+ return ethtool_get_drvinfo(dev, useraddr);
+ case ETHTOOL_GREGS:
+ return ethtool_get_regs(dev, useraddr);
+ case ETHTOOL_GWOL:
+ return ethtool_get_wol(dev, useraddr);
+ case ETHTOOL_SWOL:
+ return ethtool_set_wol(dev, useraddr);
+ case ETHTOOL_GMSGLVL:
+ return ethtool_get_msglevel(dev, useraddr);
+ case ETHTOOL_SMSGLVL:
+ return ethtool_set_msglevel(dev, useraddr);
+ case ETHTOOL_NWAY_RST:
+ return ethtool_nway_reset(dev);
+ case ETHTOOL_GLINK:
+ return ethtool_get_link(dev, useraddr);
+ case ETHTOOL_GEEPROM:
+ return ethtool_get_eeprom(dev, useraddr);
+ case ETHTOOL_SEEPROM:
+ return ethtool_set_eeprom(dev, useraddr);
+ case ETHTOOL_GCOALESCE:
+ return ethtool_get_coalesce(dev, useraddr);
+ case ETHTOOL_SCOALESCE:
+ return ethtool_set_coalesce(dev, useraddr);
+ case ETHTOOL_GRINGPARAM:
+ return ethtool_get_ringparam(dev, useraddr);
+ case ETHTOOL_SRINGPARAM:
+ return ethtool_set_ringparam(dev, useraddr);
+ case ETHTOOL_GPAUSEPARAM:
+ return ethtool_get_pauseparam(dev, useraddr);
+ case ETHTOOL_SPAUSEPARAM:
+ return ethtool_set_pauseparam(dev, useraddr);
+ case ETHTOOL_GRXCSUM:
+ return ethtool_get_rx_csum(dev, useraddr);
+ case ETHTOOL_SRXCSUM:
+ return ethtool_set_rx_csum(dev, useraddr);
+ case ETHTOOL_GTXCSUM:
+ return ethtool_get_tx_csum(dev, useraddr);
+ case ETHTOOL_STXCSUM:
+ return ethtool_set_tx_csum(dev, useraddr);
+ case ETHTOOL_GSG:
+ return ethtool_get_sg(dev, useraddr);
+ case ETHTOOL_SSG:
+ return ethtool_set_sg(dev, useraddr);
+ case ETHTOOL_GTSO:
+ return ethtool_get_tso(dev, useraddr);
+ case ETHTOOL_STSO:
+ return ethtool_set_tso(dev, useraddr);
+ case ETHTOOL_TEST:
+ return ethtool_self_test(dev, useraddr);
+ case ETHTOOL_GSTRINGS:
+ return ethtool_get_strings(dev, useraddr);
+ case ETHTOOL_PHYS_ID:
+ return ethtool_phys_id(dev, useraddr);
+ case ETHTOOL_GSTATS:
+ return ethtool_get_stats(dev, useraddr);
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+#define mii_if_info _kc_mii_if_info
+struct _kc_mii_if_info {
+ int phy_id;
+ int advertising;
+ int phy_id_mask;
+ int reg_num_mask;
+
+ unsigned int full_duplex : 1; /* is full duplex? */
+ unsigned int force_media : 1; /* is autoneg. disabled? */
+
+ struct net_device *dev;
+ int (*mdio_read) (struct net_device *dev, int phy_id, int location);
+ void (*mdio_write) (struct net_device *dev, int phy_id, int location, int val);
+};
+
+struct ethtool_cmd;
+struct mii_ioctl_data;
+
+#undef mii_link_ok
+#define mii_link_ok _kc_mii_link_ok
+#undef mii_nway_restart
+#define mii_nway_restart _kc_mii_nway_restart
+#undef mii_ethtool_gset
+#define mii_ethtool_gset _kc_mii_ethtool_gset
+#undef mii_ethtool_sset
+#define mii_ethtool_sset _kc_mii_ethtool_sset
+#undef mii_check_link
+#define mii_check_link _kc_mii_check_link
+extern int _kc_mii_link_ok (struct mii_if_info *mii);
+extern int _kc_mii_nway_restart (struct mii_if_info *mii);
+extern int _kc_mii_ethtool_gset(struct mii_if_info *mii,
+ struct ethtool_cmd *ecmd);
+extern int _kc_mii_ethtool_sset(struct mii_if_info *mii,
+ struct ethtool_cmd *ecmd);
+extern void _kc_mii_check_link (struct mii_if_info *mii);
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) )
+#undef generic_mii_ioctl
+#define generic_mii_ioctl _kc_generic_mii_ioctl
+extern int _kc_generic_mii_ioctl(struct mii_if_info *mii_if,
+ struct mii_ioctl_data *mii_data, int cmd,
+ unsigned int *duplex_changed);
+#endif /* > 2.4.6 */
+
+
+struct _kc_pci_dev_ext {
+ struct pci_dev *dev;
+ void *pci_drvdata;
+ struct pci_driver *driver;
+};
+
+struct _kc_net_dev_ext {
+ struct net_device *dev;
+ unsigned int carrier;
+};
+
+
+/**************************************/
+/* mii support */
+
+int _kc_mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
+{
+ struct net_device *dev = mii->dev;
+ u32 advert, bmcr, lpa, nego;
+
+ ecmd->supported =
+ (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
+ SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
+ SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
+
+ /* only supports twisted-pair */
+ ecmd->port = PORT_MII;
+
+ /* only supports internal transceiver */
+ ecmd->transceiver = XCVR_INTERNAL;
+
+ /* this isn't fully supported at higher layers */
+ ecmd->phy_address = mii->phy_id;
+
+ ecmd->advertising = ADVERTISED_TP | ADVERTISED_MII;
+ advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
+ if (advert & ADVERTISE_10HALF)
+ ecmd->advertising |= ADVERTISED_10baseT_Half;
+ if (advert & ADVERTISE_10FULL)
+ ecmd->advertising |= ADVERTISED_10baseT_Full;
+ if (advert & ADVERTISE_100HALF)
+ ecmd->advertising |= ADVERTISED_100baseT_Half;
+ if (advert & ADVERTISE_100FULL)
+ ecmd->advertising |= ADVERTISED_100baseT_Full;
+
+ bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+ lpa = mii->mdio_read(dev, mii->phy_id, MII_LPA);
+ if (bmcr & BMCR_ANENABLE) {
+ ecmd->advertising |= ADVERTISED_Autoneg;
+ ecmd->autoneg = AUTONEG_ENABLE;
+
+ nego = mii_nway_result(advert & lpa);
+ if (nego == LPA_100FULL || nego == LPA_100HALF)
+ ecmd->speed = SPEED_100;
+ else
+ ecmd->speed = SPEED_10;
+ if (nego == LPA_100FULL || nego == LPA_10FULL) {
+ ecmd->duplex = DUPLEX_FULL;
+ mii->full_duplex = 1;
+ } else {
+ ecmd->duplex = DUPLEX_HALF;
+ mii->full_duplex = 0;
+ }
+ } else {
+ ecmd->autoneg = AUTONEG_DISABLE;
+
+ ecmd->speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10;
+ ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF;
+ }
+
+ /* ignore maxtxpkt, maxrxpkt for now */
+
+ return 0;
+}
+
+int _kc_mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
+{
+ struct net_device *dev = mii->dev;
+
+ if (ecmd->speed != SPEED_10 && ecmd->speed != SPEED_100)
+ return -EINVAL;
+ if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL)
+ return -EINVAL;
+ if (ecmd->port != PORT_MII)
+ return -EINVAL;
+ if (ecmd->transceiver != XCVR_INTERNAL)
+ return -EINVAL;
+ if (ecmd->phy_address != mii->phy_id)
+ return -EINVAL;
+ if (ecmd->autoneg != AUTONEG_DISABLE && ecmd->autoneg != AUTONEG_ENABLE)
+ return -EINVAL;
+
+ /* ignore supported, maxtxpkt, maxrxpkt */
+
+ if (ecmd->autoneg == AUTONEG_ENABLE) {
+ u32 bmcr, advert, tmp;
+
+ if ((ecmd->advertising & (ADVERTISED_10baseT_Half |
+ ADVERTISED_10baseT_Full |
+ ADVERTISED_100baseT_Half |
+ ADVERTISED_100baseT_Full)) == 0)
+ return -EINVAL;
+
+ /* advertise only what has been requested */
+ advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
+ tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4);
+ if (ADVERTISED_10baseT_Half)
+ tmp |= ADVERTISE_10HALF;
+ if (ADVERTISED_10baseT_Full)
+ tmp |= ADVERTISE_10FULL;
+ if (ADVERTISED_100baseT_Half)
+ tmp |= ADVERTISE_100HALF;
+ if (ADVERTISED_100baseT_Full)
+ tmp |= ADVERTISE_100FULL;
+ if (advert != tmp) {
+ mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp);
+ mii->advertising = tmp;
+ }
+
+ /* turn on autonegotiation, and force a renegotiate */
+ bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+ bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
+ mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr);
+
+ mii->force_media = 0;
+ } else {
+ u32 bmcr, tmp;
+
+ /* turn off auto negotiation, set speed and duplexity */
+ bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+ tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | BMCR_FULLDPLX);
+ if (ecmd->speed == SPEED_100)
+ tmp |= BMCR_SPEED100;
+ if (ecmd->duplex == DUPLEX_FULL) {
+ tmp |= BMCR_FULLDPLX;
+ mii->full_duplex = 1;
+ } else
+ mii->full_duplex = 0;
+ if (bmcr != tmp)
+ mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp);
+
+ mii->force_media = 1;
+ }
+ return 0;
+}
+
+int _kc_mii_link_ok (struct mii_if_info *mii)
+{
+ /* first, a dummy read, needed to latch some MII phys */
+ mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR);
+ if (mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR) & BMSR_LSTATUS)
+ return 1;
+ return 0;
+}
+
+int _kc_mii_nway_restart (struct mii_if_info *mii)
+{
+ int bmcr;
+ int r = -EINVAL;
+
+ /* if autoneg is off, it's an error */
+ bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR);
+
+ if (bmcr & BMCR_ANENABLE) {
+ bmcr |= BMCR_ANRESTART;
+ mii->mdio_write(mii->dev, mii->phy_id, MII_BMCR, bmcr);
+ r = 0;
+ }
+
+ return r;
+}
+
+void _kc_mii_check_link (struct mii_if_info *mii)
+{
+ int cur_link = mii_link_ok(mii);
+ int prev_link = netif_carrier_ok(mii->dev);
+
+ if (cur_link && !prev_link)
+ netif_carrier_on(mii->dev);
+ else if (prev_link && !cur_link)
+ netif_carrier_off(mii->dev);
+}
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) )
+int _kc_generic_mii_ioctl(struct mii_if_info *mii_if,
+ struct mii_ioctl_data *mii_data, int cmd,
+ unsigned int *duplex_chg_out)
+{
+ int rc = 0;
+ unsigned int duplex_changed = 0;
+
+ if (duplex_chg_out)
+ *duplex_chg_out = 0;
+
+ mii_data->phy_id &= mii_if->phy_id_mask;
+ mii_data->reg_num &= mii_if->reg_num_mask;
+
+ switch(cmd) {
+ case SIOCDEVPRIVATE: /* binary compat, remove in 2.5 */
+ case SIOCGMIIPHY:
+ mii_data->phy_id = mii_if->phy_id;
+ /* fall through */
+
+ case SIOCDEVPRIVATE + 1:/* binary compat, remove in 2.5 */
+ case SIOCGMIIREG:
+ mii_data->val_out =
+ mii_if->mdio_read(mii_if->dev, mii_data->phy_id,
+ mii_data->reg_num);
+ break;
+
+ case SIOCDEVPRIVATE + 2:/* binary compat, remove in 2.5 */
+ case SIOCSMIIREG: {
+ u16 val = mii_data->val_in;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (mii_data->phy_id == mii_if->phy_id) {
+ switch(mii_data->reg_num) {
+ case MII_BMCR: {
+ unsigned int new_duplex = 0;
+ if (val & (BMCR_RESET|BMCR_ANENABLE))
+ mii_if->force_media = 0;
+ else
+ mii_if->force_media = 1;
+ if (mii_if->force_media &&
+ (val & BMCR_FULLDPLX))
+ new_duplex = 1;
+ if (mii_if->full_duplex != new_duplex) {
+ duplex_changed = 1;
+ mii_if->full_duplex = new_duplex;
+ }
+ break;
+ }
+ case MII_ADVERTISE:
+ mii_if->advertising = val;
+ break;
+ default:
+ /* do nothing */
+ break;
+ }
+ }
+
+ mii_if->mdio_write(mii_if->dev, mii_data->phy_id,
+ mii_data->reg_num, val);
+ break;
+ }
+
+ default:
+ rc = -EOPNOTSUPP;
+ break;
+ }
+
+ if ((rc == 0) && (duplex_chg_out) && (duplex_changed))
+ *duplex_chg_out = 1;
+
+ return rc;
+}
+#endif /* > 2.4.6 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING
new file mode 100644
index 00000000..5f297e5b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING
@@ -0,0 +1,339 @@
+
+"This software program is licensed subject to the GNU General Public License
+(GPL). Version 2, June 1991, available at
+<http://www.fsf.org/copyleft/gpl.html>"
+
+GNU General Public License
+
+Version 2, June 1991
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+Everyone is permitted to copy and distribute verbatim copies of this license
+document, but changing it is not allowed.
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to
+share and change it. By contrast, the GNU General Public License is intended
+to guarantee your freedom to share and change free software--to make sure
+the software is free for all its users. This General Public License applies
+to most of the Free Software Foundation's software and to any other program
+whose authors commit to using it. (Some other Free Software Foundation
+software is covered by the GNU Library General Public License instead.) You
+can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our
+General Public Licenses are designed to make sure that you have the freedom
+to distribute copies of free software (and charge for this service if you
+wish), that you receive source code or can get it if you want it, that you
+can change the software or use pieces of it in new free programs; and that
+you know you can do these things.
+
+To protect your rights, we need to make restrictions that forbid anyone to
+deny you these rights or to ask you to surrender the rights. These
+restrictions translate to certain responsibilities for you if you distribute
+copies of the software, or if you modify it.
+
+For example, if you distribute copies of such a program, whether gratis or
+for a fee, you must give the recipients all the rights that you have. You
+must make sure that they, too, receive or can get the source code. And you
+must show them these terms so they know their rights.
+
+We protect your rights with two steps: (1) copyright the software, and (2)
+offer you this license which gives you legal permission to copy, distribute
+and/or modify the software.
+
+Also, for each author's protection and ours, we want to make certain that
+everyone understands that there is no warranty for this free software. If
+the software is modified by someone else and passed on, we want its
+recipients to know that what they have is not the original, so that any
+problems introduced by others will not reflect on the original authors'
+reputations.
+
+Finally, any free program is threatened constantly by software patents. We
+wish to avoid the danger that redistributors of a free program will
+individually obtain patent licenses, in effect making the program
+proprietary. To prevent this, we have made it clear that any patent must be
+licensed for everyone's free use or not licensed at all.
+
+The precise terms and conditions for copying, distribution and modification
+follow.
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License applies to any program or other work which contains a notice
+ placed by the copyright holder saying it may be distributed under the
+ terms of this General Public License. The "Program", below, refers to any
+ such program or work, and a "work based on the Program" means either the
+ Program or any derivative work under copyright law: that is to say, a
+ work containing the Program or a portion of it, either verbatim or with
+ modifications and/or translated into another language. (Hereinafter,
+ translation is included without limitation in the term "modification".)
+ Each licensee is addressed as "you".
+
+ Activities other than copying, distribution and modification are not
+ covered by this License; they are outside its scope. The act of running
+ the Program is not restricted, and the output from the Program is covered
+ only if its contents constitute a work based on the Program (independent
+ of having been made by running the Program). Whether that is true depends
+ on what the Program does.
+
+1. You may copy and distribute verbatim copies of the Program's source code
+ as you receive it, in any medium, provided that you conspicuously and
+ appropriately publish on each copy an appropriate copyright notice and
+ disclaimer of warranty; keep intact all the notices that refer to this
+ License and to the absence of any warranty; and give any other recipients
+ of the Program a copy of this License along with the Program.
+
+ You may charge a fee for the physical act of transferring a copy, and you
+ may at your option offer warranty protection in exchange for a fee.
+
+2. You may modify your copy or copies of the Program or any portion of it,
+ thus forming a work based on the Program, and copy and distribute such
+ modifications or work under the terms of Section 1 above, provided that
+ you also meet all of these conditions:
+
+ * a) You must cause the modified files to carry prominent notices stating
+ that you changed the files and the date of any change.
+
+ * b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any part
+ thereof, to be licensed as a whole at no charge to all third parties
+ under the terms of this License.
+
+ * c) If the modified program normally reads commands interactively when
+ run, you must cause it, when started running for such interactive
+ use in the most ordinary way, to print or display an announcement
+ including an appropriate copyright notice and a notice that there is
+ no warranty (or else, saying that you provide a warranty) and that
+ users may redistribute the program under these conditions, and
+ telling the user how to view a copy of this License. (Exception: if
+ the Program itself is interactive but does not normally print such
+ an announcement, your work based on the Program is not required to
+ print an announcement.)
+
+ These requirements apply to the modified work as a whole. If identifiable
+ sections of that work are not derived from the Program, and can be
+ reasonably considered independent and separate works in themselves, then
+ this License, and its terms, do not apply to those sections when you
+ distribute them as separate works. But when you distribute the same
+ sections as part of a whole which is a work based on the Program, the
+ distribution of the whole must be on the terms of this License, whose
+ permissions for other licensees extend to the entire whole, and thus to
+ each and every part regardless of who wrote it.
+
+ Thus, it is not the intent of this section to claim rights or contest
+ your rights to work written entirely by you; rather, the intent is to
+ exercise the right to control the distribution of derivative or
+ collective works based on the Program.
+
+ In addition, mere aggregation of another work not based on the Program
+ with the Program (or with a work based on the Program) on a volume of a
+ storage or distribution medium does not bring the other work under the
+ scope of this License.
+
+3. You may copy and distribute the Program (or a work based on it, under
+ Section 2) in object code or executable form under the terms of Sections
+ 1 and 2 above provided that you also do one of the following:
+
+ * a) Accompany it with the complete corresponding machine-readable source
+ code, which must be distributed under the terms of Sections 1 and 2
+ above on a medium customarily used for software interchange; or,
+
+ * b) Accompany it with a written offer, valid for at least three years,
+ to give any third party, for a charge no more than your cost of
+ physically performing source distribution, a complete machine-
+ readable copy of the corresponding source code, to be distributed
+ under the terms of Sections 1 and 2 above on a medium customarily
+ used for software interchange; or,
+
+ * c) Accompany it with the information you received as to the offer to
+ distribute corresponding source code. (This alternative is allowed
+ only for noncommercial distribution and only if you received the
+ program in object code or executable form with such an offer, in
+ accord with Subsection b above.)
+
+ The source code for a work means the preferred form of the work for
+ making modifications to it. For an executable work, complete source code
+ means all the source code for all modules it contains, plus any
+ associated interface definition files, plus the scripts used to control
+ compilation and installation of the executable. However, as a special
+ exception, the source code distributed need not include anything that is
+ normally distributed (in either source or binary form) with the major
+ components (compiler, kernel, and so on) of the operating system on which
+ the executable runs, unless that component itself accompanies the
+ executable.
+
+ If distribution of executable or object code is made by offering access
+ to copy from a designated place, then offering equivalent access to copy
+ the source code from the same place counts as distribution of the source
+ code, even though third parties are not compelled to copy the source
+ along with the object code.
+
+4. You may not copy, modify, sublicense, or distribute the Program except as
+ expressly provided under this License. Any attempt otherwise to copy,
+ modify, sublicense or distribute the Program is void, and will
+ automatically terminate your rights under this License. However, parties
+ who have received copies, or rights, from you under this License will not
+ have their licenses terminated so long as such parties remain in full
+ compliance.
+
+5. You are not required to accept this License, since you have not signed
+ it. However, nothing else grants you permission to modify or distribute
+ the Program or its derivative works. These actions are prohibited by law
+ if you do not accept this License. Therefore, by modifying or
+ distributing the Program (or any work based on the Program), you
+ indicate your acceptance of this License to do so, and all its terms and
+ conditions for copying, distributing or modifying the Program or works
+ based on it.
+
+6. Each time you redistribute the Program (or any work based on the
+ Program), the recipient automatically receives a license from the
+ original licensor to copy, distribute or modify the Program subject to
+ these terms and conditions. You may not impose any further restrictions
+ on the recipients' exercise of the rights granted herein. You are not
+ responsible for enforcing compliance by third parties to this License.
+
+7. If, as a consequence of a court judgment or allegation of patent
+ infringement or for any other reason (not limited to patent issues),
+ conditions are imposed on you (whether by court order, agreement or
+ otherwise) that contradict the conditions of this License, they do not
+ excuse you from the conditions of this License. If you cannot distribute
+ so as to satisfy simultaneously your obligations under this License and
+ any other pertinent obligations, then as a consequence you may not
+ distribute the Program at all. For example, if a patent license would
+ not permit royalty-free redistribution of the Program by all those who
+ receive copies directly or indirectly through you, then the only way you
+ could satisfy both it and this License would be to refrain entirely from
+ distribution of the Program.
+
+ If any portion of this section is held invalid or unenforceable under any
+ particular circumstance, the balance of the section is intended to apply
+ and the section as a whole is intended to apply in other circumstances.
+
+ It is not the purpose of this section to induce you to infringe any
+ patents or other property right claims or to contest validity of any
+ such claims; this section has the sole purpose of protecting the
+ integrity of the free software distribution system, which is implemented
+ by public license practices. Many people have made generous contributions
+ to the wide range of software distributed through that system in
+ reliance on consistent application of that system; it is up to the
+ author/donor to decide if he or she is willing to distribute software
+ through any other system and a licensee cannot impose that choice.
+
+ This section is intended to make thoroughly clear what is believed to be
+ a consequence of the rest of this License.
+
+8. If the distribution and/or use of the Program is restricted in certain
+ countries either by patents or by copyrighted interfaces, the original
+ copyright holder who places the Program under this License may add an
+ explicit geographical distribution limitation excluding those countries,
+ so that distribution is permitted only in or among countries not thus
+ excluded. In such case, this License incorporates the limitation as if
+ written in the body of this License.
+
+9. The Free Software Foundation may publish revised and/or new versions of
+ the General Public License from time to time. Such new versions will be
+ similar in spirit to the present version, but may differ in detail to
+ address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the Program
+ specifies a version number of this License which applies to it and "any
+ later version", you have the option of following the terms and
+ conditions either of that version or of any later version published by
+ the Free Software Foundation. If the Program does not specify a version
+ number of this License, you may choose any version ever published by the
+ Free Software Foundation.
+
+10. If you wish to incorporate parts of the Program into other free programs
+ whose distribution conditions are different, write to the author to ask
+ for permission. For software which is copyrighted by the Free Software
+ Foundation, write to the Free Software Foundation; we sometimes make
+ exceptions for this. Our decision will be guided by the two goals of
+ preserving the free status of all derivatives of our free software and
+ of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+ FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+ PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+ EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH
+ YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+ NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+ REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR
+ DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
+ DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM
+ (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
+ INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
+ THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR
+ OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it free
+software which everyone can redistribute and change under these terms.
+
+To do so, attach the following notices to the program. It is safest to
+attach them to the start of each source file to most effectively convey the
+exclusion of warranty; and each file should have at least the "copyright"
+line and a pointer to where the full notice is found.
+
+one line to give the program's name and an idea of what it does.
+Copyright (C) yyyy name of author
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59
+Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this when
+it starts in an interactive mode:
+
+Gnomovision version 69, Copyright (C) year name of author Gnomovision comes
+with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free
+software, and you are welcome to redistribute it under certain conditions;
+type 'show c' for details.
+
+The hypothetical commands 'show w' and 'show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may be
+called something other than 'show w' and 'show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+'Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+signature of Ty Coon, 1 April 1989
+Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General Public
+License instead of this License.
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
new file mode 100644
index 00000000..222c2c71
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
@@ -0,0 +1,925 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_H_
+#define _IXGBE_H_
+
+#ifndef IXGBE_NO_LRO
+#include <net/tcp.h>
+#endif
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#ifdef HAVE_IRQ_AFFINITY_HINT
+#include <linux/cpumask.h>
+#endif /* HAVE_IRQ_AFFINITY_HINT */
+#include <linux/vmalloc.h>
+
+#ifdef SIOCETHTOOL
+#include <linux/ethtool.h>
+#endif
+#ifdef NETIF_F_HW_VLAN_TX
+#include <linux/if_vlan.h>
+#endif
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+#define IXGBE_DCA
+#include <linux/dca.h>
+#endif
+#include "ixgbe_dcb.h"
+
+#include "kcompat.h"
+
+#ifdef HAVE_SCTP
+#include <linux/sctp.h>
+#endif
+
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#define IXGBE_FCOE
+#include "ixgbe_fcoe.h"
+#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */
+
+#if defined(CONFIG_PTP_1588_CLOCK) || defined(CONFIG_PTP_1588_CLOCK_MODULE)
+#define HAVE_IXGBE_PTP
+#endif
+
+#include "ixgbe_api.h"
+
+#define PFX "ixgbe: "
+#define DPRINTK(nlevel, klevel, fmt, args...) \
+ ((void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \
+ printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \
+ __func__ , ## args)))
+
+/* TX/RX descriptor defines */
+#define IXGBE_DEFAULT_TXD 512
+#define IXGBE_DEFAULT_TX_WORK 256
+#define IXGBE_MAX_TXD 4096
+#define IXGBE_MIN_TXD 64
+
+#define IXGBE_DEFAULT_RXD 512
+#define IXGBE_DEFAULT_RX_WORK 256
+#define IXGBE_MAX_RXD 4096
+#define IXGBE_MIN_RXD 64
+
+
+/* flow control */
+#define IXGBE_MIN_FCRTL 0x40
+#define IXGBE_MAX_FCRTL 0x7FF80
+#define IXGBE_MIN_FCRTH 0x600
+#define IXGBE_MAX_FCRTH 0x7FFF0
+#define IXGBE_DEFAULT_FCPAUSE 0xFFFF
+#define IXGBE_MIN_FCPAUSE 0
+#define IXGBE_MAX_FCPAUSE 0xFFFF
+
+/* Supported Rx Buffer Sizes */
+#define IXGBE_RXBUFFER_512 512 /* Used for packet split */
+#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+#define IXGBE_RXBUFFER_1536 1536
+#define IXGBE_RXBUFFER_2K 2048
+#define IXGBE_RXBUFFER_3K 3072
+#define IXGBE_RXBUFFER_4K 4096
+#define IXGBE_RXBUFFER_7K 7168
+#define IXGBE_RXBUFFER_8K 8192
+#define IXGBE_RXBUFFER_15K 15360
+#endif /* CONFIG_IXGBE_DISABLE_PACKET_SPLIT */
+#define IXGBE_MAX_RXBUFFER 16384 /* largest size for single descriptor */
+
+/*
+ * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN mans we
+ * reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
+ * this adds up to 512 bytes of extra data meaning the smallest allocation
+ * we could have is 1K.
+ * i.e. RXBUFFER_512 --> size-1024 slab
+ */
+#define IXGBE_RX_HDR_SIZE IXGBE_RXBUFFER_512
+
+#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IXGBE_RX_BUFFER_WRITE 16 /* Must be power of 2 */
+
+#define IXGBE_TX_FLAGS_CSUM (u32)(1)
+#define IXGBE_TX_FLAGS_HW_VLAN (u32)(1 << 1)
+#define IXGBE_TX_FLAGS_SW_VLAN (u32)(1 << 2)
+#define IXGBE_TX_FLAGS_TSO (u32)(1 << 3)
+#define IXGBE_TX_FLAGS_IPV4 (u32)(1 << 4)
+#define IXGBE_TX_FLAGS_FCOE (u32)(1 << 5)
+#define IXGBE_TX_FLAGS_FSO (u32)(1 << 6)
+#define IXGBE_TX_FLAGS_TXSW (u32)(1 << 7)
+#define IXGBE_TX_FLAGS_TSTAMP (u32)(1 << 8)
+#define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT 29
+#define IXGBE_TX_FLAGS_VLAN_SHIFT 16
+
+#define IXGBE_MAX_RX_DESC_POLL 10
+
+#define IXGBE_MAX_VF_MC_ENTRIES 30
+#define IXGBE_MAX_VF_FUNCTIONS 64
+#define IXGBE_MAX_VFTA_ENTRIES 128
+#define MAX_EMULATION_MAC_ADDRS 16
+#define IXGBE_MAX_PF_MACVLANS 15
+#define IXGBE_82599_VF_DEVICE_ID 0x10ED
+#define IXGBE_X540_VF_DEVICE_ID 0x1515
+
+#ifdef CONFIG_PCI_IOV
+#define VMDQ_P(p) ((p) + adapter->num_vfs)
+#else
+#define VMDQ_P(p) (p)
+#endif
+
+#define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter) \
+ { \
+ u32 current_counter = IXGBE_READ_REG(hw, reg); \
+ if (current_counter < last_counter) \
+ counter += 0x100000000LL; \
+ last_counter = current_counter; \
+ counter &= 0xFFFFFFFF00000000LL; \
+ counter |= current_counter; \
+ }
+
+#define UPDATE_VF_COUNTER_36bit(reg_lsb, reg_msb, last_counter, counter) \
+ { \
+ u64 current_counter_lsb = IXGBE_READ_REG(hw, reg_lsb); \
+ u64 current_counter_msb = IXGBE_READ_REG(hw, reg_msb); \
+ u64 current_counter = (current_counter_msb << 32) | \
+ current_counter_lsb; \
+ if (current_counter < last_counter) \
+ counter += 0x1000000000LL; \
+ last_counter = current_counter; \
+ counter &= 0xFFFFFFF000000000LL; \
+ counter |= current_counter; \
+ }
+
+struct vf_stats {
+ u64 gprc;
+ u64 gorc;
+ u64 gptc;
+ u64 gotc;
+ u64 mprc;
+};
+
+struct vf_data_storage {
+ unsigned char vf_mac_addresses[ETH_ALEN];
+ u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES];
+ u16 num_vf_mc_hashes;
+ u16 default_vf_vlan_id;
+ u16 vlans_enabled;
+ bool clear_to_send;
+ struct vf_stats vfstats;
+ struct vf_stats last_vfstats;
+ struct vf_stats saved_rst_vfstats;
+ bool pf_set_mac;
+ u16 pf_vlan; /* When set, guest VLAN config not allowed. */
+ u16 pf_qos;
+ u16 tx_rate;
+ u16 vlan_count;
+ u8 spoofchk_enabled;
+ struct pci_dev *vfdev;
+};
+
+struct vf_macvlans {
+ struct list_head l;
+ int vf;
+ bool free;
+ bool is_macvlan;
+ u8 vf_macvlan[ETH_ALEN];
+};
+
+#ifndef IXGBE_NO_LRO
+#define IXGBE_LRO_MAX 32 /*Maximum number of LRO descriptors*/
+#define IXGBE_LRO_GLOBAL 10
+
+struct ixgbe_lro_stats {
+ u32 flushed;
+ u32 coal;
+};
+
+/*
+ * ixgbe_lro_header - header format to be aggregated by LRO
+ * @iph: IP header without options
+ * @tcp: TCP header
+ * @ts: Optional TCP timestamp data in TCP options
+ *
+ * This structure relies on the check above that verifies that the header
+ * is IPv4 and does not contain any options.
+ */
+struct ixgbe_lrohdr {
+ struct iphdr iph;
+ struct tcphdr th;
+ __be32 ts[0];
+};
+
+struct ixgbe_lro_list {
+ struct sk_buff_head active;
+ struct ixgbe_lro_stats stats;
+};
+
+#endif /* IXGBE_NO_LRO */
+#define IXGBE_MAX_TXD_PWR 14
+#define IXGBE_MAX_DATA_PER_TXD (1 << IXGBE_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IXGBE_MAX_DATA_PER_TXD)
+#ifdef MAX_SKB_FRAGS
+#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4)
+#else
+#define DESC_NEEDED 4
+#endif
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer */
+struct ixgbe_tx_buffer {
+ union ixgbe_adv_tx_desc *next_to_watch;
+ unsigned long time_stamp;
+ struct sk_buff *skb;
+ unsigned int bytecount;
+ unsigned short gso_segs;
+ __be16 protocol;
+ DEFINE_DMA_UNMAP_ADDR(dma);
+ DEFINE_DMA_UNMAP_LEN(len);
+ u32 tx_flags;
+};
+
+struct ixgbe_rx_buffer {
+ struct sk_buff *skb;
+ dma_addr_t dma;
+#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+ struct page *page;
+ unsigned int page_offset;
+#endif
+};
+
+struct ixgbe_queue_stats {
+ u64 packets;
+ u64 bytes;
+};
+
+struct ixgbe_tx_queue_stats {
+ u64 restart_queue;
+ u64 tx_busy;
+ u64 tx_done_old;
+};
+
+struct ixgbe_rx_queue_stats {
+ u64 rsc_count;
+ u64 rsc_flush;
+ u64 non_eop_descs;
+ u64 alloc_rx_page_failed;
+ u64 alloc_rx_buff_failed;
+ u64 csum_err;
+};
+
+enum ixgbe_ring_state_t {
+ __IXGBE_TX_FDIR_INIT_DONE,
+ __IXGBE_TX_DETECT_HANG,
+ __IXGBE_HANG_CHECK_ARMED,
+ __IXGBE_RX_RSC_ENABLED,
+#ifndef HAVE_NDO_SET_FEATURES
+ __IXGBE_RX_CSUM_ENABLED,
+#endif
+ __IXGBE_RX_CSUM_UDP_ZERO_ERR,
+#ifdef IXGBE_FCOE
+ __IXGBE_RX_FCOE_BUFSZ,
+#endif
+};
+
+#define check_for_tx_hang(ring) \
+ test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+ set_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+ clear_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
+#ifndef IXGBE_NO_HW_RSC
+#define ring_is_rsc_enabled(ring) \
+ test_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
+#else
+#define ring_is_rsc_enabled(ring) false
+#endif
+#define set_ring_rsc_enabled(ring) \
+ set_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
+#define clear_ring_rsc_enabled(ring) \
+ clear_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
+#define netdev_ring(ring) (ring->netdev)
+#define ring_queue_index(ring) (ring->queue_index)
+
+
+struct ixgbe_ring {
+ struct ixgbe_ring *next; /* pointer to next ring in q_vector */
+ struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
+ struct net_device *netdev; /* netdev ring belongs to */
+ struct device *dev; /* device for DMA mapping */
+ void *desc; /* descriptor ring memory */
+ union {
+ struct ixgbe_tx_buffer *tx_buffer_info;
+ struct ixgbe_rx_buffer *rx_buffer_info;
+ };
+ unsigned long state;
+ u8 __iomem *tail;
+ dma_addr_t dma; /* phys. address of descriptor ring */
+ unsigned int size; /* length in bytes */
+
+ u16 count; /* amount of descriptors */
+
+ u8 queue_index; /* needed for multiqueue queue management */
+ u8 reg_idx; /* holds the special value that gets
+ * the hardware register offset
+ * associated with this ring, which is
+ * different for DCB and RSS modes
+ */
+ u16 next_to_use;
+ u16 next_to_clean;
+
+ union {
+#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+ u16 rx_buf_len;
+#else
+ u16 next_to_alloc;
+#endif
+ struct {
+ u8 atr_sample_rate;
+ u8 atr_count;
+ };
+ };
+
+ u8 dcb_tc;
+ struct ixgbe_queue_stats stats;
+ union {
+ struct ixgbe_tx_queue_stats tx_stats;
+ struct ixgbe_rx_queue_stats rx_stats;
+ };
+} ____cacheline_internodealigned_in_smp;
+
+enum ixgbe_ring_f_enum {
+ RING_F_NONE = 0,
+ RING_F_VMDQ, /* SR-IOV uses the same ring feature */
+ RING_F_RSS,
+ RING_F_FDIR,
+#ifdef IXGBE_FCOE
+ RING_F_FCOE,
+#endif /* IXGBE_FCOE */
+ RING_F_ARRAY_SIZE /* must be last in enum set */
+};
+
+#define IXGBE_MAX_DCB_INDICES 8
+#define IXGBE_MAX_RSS_INDICES 16
+#define IXGBE_MAX_VMDQ_INDICES 64
+#define IXGBE_MAX_FDIR_INDICES 64
+#ifdef IXGBE_FCOE
+#define IXGBE_MAX_FCOE_INDICES 8
+#define MAX_RX_QUEUES (IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES)
+#define MAX_TX_QUEUES (IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES)
+#else
+#define MAX_RX_QUEUES IXGBE_MAX_FDIR_INDICES
+#define MAX_TX_QUEUES IXGBE_MAX_FDIR_INDICES
+#endif /* IXGBE_FCOE */
+struct ixgbe_ring_feature {
+ int indices;
+ int mask;
+};
+
+#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+/*
+ * FCoE requires that all Rx buffers be over 2200 bytes in length. Since
+ * this is twice the size of a half page we need to double the page order
+ * for FCoE enabled Rx queues.
+ */
+#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192)
+static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring)
+{
+ return test_bit(__IXGBE_RX_FCOE_BUFSZ, &ring->state) ? 1 : 0;
+}
+#else
+#define ixgbe_rx_pg_order(_ring) 0
+#endif
+#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring))
+#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring))
+
+#endif
+struct ixgbe_ring_container {
+ struct ixgbe_ring *ring; /* pointer to linked list of rings */
+ unsigned int total_bytes; /* total bytes processed this int */
+ unsigned int total_packets; /* total packets processed this int */
+ u16 work_limit; /* total work allowed per interrupt */
+ u8 count; /* total number of rings in vector */
+ u8 itr; /* current ITR setting for ring */
+};
+
+/* iterator for handling rings in ring container */
+#define ixgbe_for_each_ring(pos, head) \
+ for (pos = (head).ring; pos != NULL; pos = pos->next)
+
+#define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
+ ? 8 : 1)
+#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS
+
+/* MAX_MSIX_Q_VECTORS of these are allocated,
+ * but we only use one per queue-specific vector.
+ */
+struct ixgbe_q_vector {
+ struct ixgbe_adapter *adapter;
+ int cpu; /* CPU for DCA */
+ u16 v_idx; /* index of q_vector within array, also used for
+ * finding the bit in EICR and friends that
+ * represents the vector for this ring */
+ u16 itr; /* Interrupt throttle rate written to EITR */
+ struct ixgbe_ring_container rx, tx;
+
+#ifdef CONFIG_IXGBE_NAPI
+ struct napi_struct napi;
+#endif
+#ifndef HAVE_NETDEV_NAPI_LIST
+ struct net_device poll_dev;
+#endif
+#ifdef HAVE_IRQ_AFFINITY_HINT
+ cpumask_t affinity_mask;
+#endif
+#ifndef IXGBE_NO_LRO
+ struct ixgbe_lro_list lrolist; /* LRO list for queue vector*/
+#endif
+ int numa_node;
+ char name[IFNAMSIZ + 9];
+
+ /* for dynamic allocation of rings associated with this q_vector */
+ struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp;
+};
+
+/*
+ * microsecond values for various ITR rates shifted by 2 to fit itr register
+ * with the first 3 bits reserved 0
+ */
+#define IXGBE_MIN_RSC_ITR 24
+#define IXGBE_100K_ITR 40
+#define IXGBE_20K_ITR 200
+#define IXGBE_16K_ITR 248
+#define IXGBE_10K_ITR 400
+#define IXGBE_8K_ITR 500
+
+/* ixgbe_test_staterr - tests bits in Rx descriptor status and error fields */
+static inline __le32 ixgbe_test_staterr(union ixgbe_adv_rx_desc *rx_desc,
+ const u32 stat_err_bits)
+{
+ return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+/* ixgbe_desc_unused - calculate if we have unused descriptors */
+static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring)
+{
+ u16 ntc = ring->next_to_clean;
+ u16 ntu = ring->next_to_use;
+
+ return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+#define IXGBE_RX_DESC(R, i) \
+ (&(((union ixgbe_adv_rx_desc *)((R)->desc))[i]))
+#define IXGBE_TX_DESC(R, i) \
+ (&(((union ixgbe_adv_tx_desc *)((R)->desc))[i]))
+#define IXGBE_TX_CTXTDESC(R, i) \
+ (&(((struct ixgbe_adv_tx_context_desc *)((R)->desc))[i]))
+
+#define IXGBE_MAX_JUMBO_FRAME_SIZE 16128
+#ifdef IXGBE_FCOE
+/* use 3K as the baby jumbo frame size for FCoE */
+#define IXGBE_FCOE_JUMBO_FRAME_SIZE 3072
+#endif /* IXGBE_FCOE */
+
+#define TCP_TIMER_VECTOR 0
+#define OTHER_VECTOR 1
+#define NON_Q_VECTORS (OTHER_VECTOR + TCP_TIMER_VECTOR)
+
+#define IXGBE_MAX_MSIX_Q_VECTORS_82599 64
+#define IXGBE_MAX_MSIX_Q_VECTORS_82598 16
+
+struct ixgbe_mac_addr {
+ u8 addr[ETH_ALEN];
+ u16 queue;
+ u16 state; /* bitmask */
+};
+#define IXGBE_MAC_STATE_DEFAULT 0x1
+#define IXGBE_MAC_STATE_MODIFIED 0x2
+#define IXGBE_MAC_STATE_IN_USE 0x4
+
+#ifdef IXGBE_PROCFS
+struct ixgbe_therm_proc_data {
+ struct ixgbe_hw *hw;
+ struct ixgbe_thermal_diode_data *sensor_data;
+};
+
+#endif /* IXGBE_PROCFS */
+
+/*
+ * Only for array allocations in our adapter struct. On 82598, there will be
+ * unused entries in the array, but that's not a big deal. Also, in 82599,
+ * we can actually assign 64 queue vectors based on our extended-extended
+ * interrupt registers. This is different than 82598, which is limited to 16.
+ */
+#define MAX_MSIX_Q_VECTORS IXGBE_MAX_MSIX_Q_VECTORS_82599
+#define MAX_MSIX_COUNT IXGBE_MAX_MSIX_VECTORS_82599
+
+#define MIN_MSIX_Q_VECTORS 1
+#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NON_Q_VECTORS)
+
+/* default to trying for four seconds */
+#define IXGBE_TRY_LINK_TIMEOUT (4 * HZ)
+
+/* board specific private data structure */
+struct ixgbe_adapter {
+#ifdef NETIF_F_HW_VLAN_TX
+#ifdef HAVE_VLAN_RX_REGISTER
+ struct vlan_group *vlgrp; /* must be first, see ixgbe_receive_skb */
+#else
+ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+#endif
+#endif /* NETIF_F_HW_VLAN_TX */
+ /* OS defined structs */
+ struct net_device *netdev;
+ struct pci_dev *pdev;
+
+ unsigned long state;
+
+ /* Some features need tri-state capability,
+ * thus the additional *_CAPABLE flags.
+ */
+ u32 flags;
+#define IXGBE_FLAG_MSI_CAPABLE (u32)(1 << 0)
+#define IXGBE_FLAG_MSI_ENABLED (u32)(1 << 1)
+#define IXGBE_FLAG_MSIX_CAPABLE (u32)(1 << 2)
+#define IXGBE_FLAG_MSIX_ENABLED (u32)(1 << 3)
+#ifndef IXGBE_NO_LLI
+#define IXGBE_FLAG_LLI_PUSH (u32)(1 << 4)
+#endif
+#define IXGBE_FLAG_IN_NETPOLL (u32)(1 << 8)
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+#define IXGBE_FLAG_DCA_ENABLED (u32)(1 << 9)
+#define IXGBE_FLAG_DCA_CAPABLE (u32)(1 << 10)
+#define IXGBE_FLAG_DCA_ENABLED_DATA (u32)(1 << 11)
+#else
+#define IXGBE_FLAG_DCA_ENABLED (u32)0
+#define IXGBE_FLAG_DCA_CAPABLE (u32)0
+#define IXGBE_FLAG_DCA_ENABLED_DATA (u32)0
+#endif
+#define IXGBE_FLAG_MQ_CAPABLE (u32)(1 << 12)
+#define IXGBE_FLAG_DCB_ENABLED (u32)(1 << 13)
+#define IXGBE_FLAG_DCB_CAPABLE (u32)(1 << 14)
+#define IXGBE_FLAG_RSS_ENABLED (u32)(1 << 15)
+#define IXGBE_FLAG_RSS_CAPABLE (u32)(1 << 16)
+#define IXGBE_FLAG_VMDQ_ENABLED (u32)(1 << 18)
+#define IXGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 19)
+#define IXGBE_FLAG_NEED_LINK_UPDATE (u32)(1 << 20)
+#define IXGBE_FLAG_NEED_LINK_CONFIG (u32)(1 << 21)
+#define IXGBE_FLAG_FDIR_HASH_CAPABLE (u32)(1 << 22)
+#define IXGBE_FLAG_FDIR_PERFECT_CAPABLE (u32)(1 << 23)
+#ifdef IXGBE_FCOE
+#define IXGBE_FLAG_FCOE_CAPABLE (u32)(1 << 24)
+#define IXGBE_FLAG_FCOE_ENABLED (u32)(1 << 25)
+#endif /* IXGBE_FCOE */
+#define IXGBE_FLAG_SRIOV_CAPABLE (u32)(1 << 26)
+#define IXGBE_FLAG_SRIOV_ENABLED (u32)(1 << 27)
+#define IXGBE_FLAG_SRIOV_REPLICATION_ENABLE (u32)(1 << 28)
+#define IXGBE_FLAG_SRIOV_L2SWITCH_ENABLE (u32)(1 << 29)
+#define IXGBE_FLAG_SRIOV_L2LOOPBACK_ENABLE (u32)(1 << 30)
+#define IXGBE_FLAG_RX_BB_CAPABLE (u32)(1 << 31)
+
+ u32 flags2;
+#ifndef IXGBE_NO_HW_RSC
+#define IXGBE_FLAG2_RSC_CAPABLE (u32)(1)
+#define IXGBE_FLAG2_RSC_ENABLED (u32)(1 << 1)
+#else
+#define IXGBE_FLAG2_RSC_CAPABLE 0
+#define IXGBE_FLAG2_RSC_ENABLED 0
+#endif
+#define IXGBE_FLAG2_VMDQ_DEFAULT_OVERRIDE (u32)(1 << 2)
+#define IXGBE_FLAG2_TEMP_SENSOR_CAPABLE (u32)(1 << 4)
+#define IXGBE_FLAG2_TEMP_SENSOR_EVENT (u32)(1 << 5)
+#define IXGBE_FLAG2_SEARCH_FOR_SFP (u32)(1 << 6)
+#define IXGBE_FLAG2_SFP_NEEDS_RESET (u32)(1 << 7)
+#define IXGBE_FLAG2_RESET_REQUESTED (u32)(1 << 8)
+#define IXGBE_FLAG2_FDIR_REQUIRES_REINIT (u32)(1 << 9)
+#define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP (u32)(1 << 10)
+#define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP (u32)(1 << 11)
+#define IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED (u32)(1 << 12)
+
+ /* Tx fast path data */
+ int num_tx_queues;
+ u16 tx_itr_setting;
+ u16 tx_work_limit;
+
+ /* Rx fast path data */
+ int num_rx_queues;
+ u16 rx_itr_setting;
+ u16 rx_work_limit;
+
+ /* TX */
+ struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
+
+ u64 restart_queue;
+ u64 lsc_int;
+ u32 tx_timeout_count;
+
+ /* RX */
+ struct ixgbe_ring *rx_ring[MAX_RX_QUEUES];
+ int num_rx_pools; /* == num_rx_queues in 82598 */
+ int num_rx_queues_per_pool; /* 1 if 82598, can be many if 82599 */
+ u64 hw_csum_rx_error;
+ u64 hw_rx_no_dma_resources;
+ u64 rsc_total_count;
+ u64 rsc_total_flush;
+ u64 non_eop_descs;
+#ifndef CONFIG_IXGBE_NAPI
+ u64 rx_dropped_backlog; /* count drops from rx intr handler */
+#endif
+ u32 alloc_rx_page_failed;
+ u32 alloc_rx_buff_failed;
+
+ struct ixgbe_q_vector *q_vector[MAX_MSIX_Q_VECTORS];
+
+#ifdef HAVE_DCBNL_IEEE
+ struct ieee_pfc *ixgbe_ieee_pfc;
+ struct ieee_ets *ixgbe_ieee_ets;
+#endif
+ struct ixgbe_dcb_config dcb_cfg;
+ struct ixgbe_dcb_config temp_dcb_cfg;
+ u8 dcb_set_bitmap;
+ u8 dcbx_cap;
+#ifndef HAVE_MQPRIO
+ u8 tc;
+#endif
+ enum ixgbe_fc_mode last_lfc_mode;
+
+ int num_msix_vectors;
+ int max_msix_q_vectors; /* true count of q_vectors for device */
+ struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE];
+ struct msix_entry *msix_entries;
+
+#ifndef HAVE_NETDEV_STATS_IN_NETDEV
+ struct net_device_stats net_stats;
+#endif
+#ifndef IXGBE_NO_LRO
+ struct ixgbe_lro_stats lro_stats;
+#endif
+
+#ifdef ETHTOOL_TEST
+ u32 test_icr;
+ struct ixgbe_ring test_tx_ring;
+ struct ixgbe_ring test_rx_ring;
+#endif
+
+ /* structs defined in ixgbe_hw.h */
+ struct ixgbe_hw hw;
+ u16 msg_enable;
+ struct ixgbe_hw_stats stats;
+#ifndef IXGBE_NO_LLI
+ u32 lli_port;
+ u32 lli_size;
+ u32 lli_etype;
+ u32 lli_vlan_pri;
+#endif /* IXGBE_NO_LLI */
+
+ u32 *config_space;
+ u64 tx_busy;
+ unsigned int tx_ring_count;
+ unsigned int rx_ring_count;
+
+ u32 link_speed;
+ bool link_up;
+ unsigned long link_check_timeout;
+
+ struct timer_list service_timer;
+ struct work_struct service_task;
+
+ struct hlist_head fdir_filter_list;
+ unsigned long fdir_overflow; /* number of times ATR was backed off */
+ union ixgbe_atr_input fdir_mask;
+ int fdir_filter_count;
+ u32 fdir_pballoc;
+ u32 atr_sample_rate;
+ spinlock_t fdir_perfect_lock;
+
+#ifdef IXGBE_FCOE
+ struct ixgbe_fcoe fcoe;
+#endif /* IXGBE_FCOE */
+ u32 wol;
+
+ u16 bd_number;
+
+ char eeprom_id[32];
+ u16 eeprom_cap;
+ bool netdev_registered;
+ u32 interrupt_event;
+#ifdef HAVE_ETHTOOL_SET_PHYS_ID
+ u32 led_reg;
+#endif
+
+ DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS);
+ unsigned int num_vfs;
+ struct vf_data_storage *vfinfo;
+ int vf_rate_link_speed;
+ struct vf_macvlans vf_mvs;
+ struct vf_macvlans *mv_list;
+#ifdef CONFIG_PCI_IOV
+ u32 timer_event_accumulator;
+ u32 vferr_refcount;
+#endif
+ struct ixgbe_mac_addr *mac_table;
+#ifdef IXGBE_SYSFS
+ struct kobject *info_kobj;
+ struct kobject *therm_kobj[IXGBE_MAX_SENSORS];
+#else /* IXGBE_SYSFS */
+#ifdef IXGBE_PROCFS
+ struct proc_dir_entry *eth_dir;
+ struct proc_dir_entry *info_dir;
+ struct proc_dir_entry *therm_dir[IXGBE_MAX_SENSORS];
+ struct ixgbe_therm_proc_data therm_data[IXGBE_MAX_SENSORS];
+#endif /* IXGBE_PROCFS */
+#endif /* IXGBE_SYSFS */
+};
+
+struct ixgbe_fdir_filter {
+ struct hlist_node fdir_node;
+ union ixgbe_atr_input filter;
+ u16 sw_idx;
+ u16 action;
+};
+
+enum ixgbe_state_t {
+ __IXGBE_TESTING,
+ __IXGBE_RESETTING,
+ __IXGBE_DOWN,
+ __IXGBE_SERVICE_SCHED,
+ __IXGBE_IN_SFP_INIT,
+};
+
+struct ixgbe_cb {
+#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+ union { /* Union defining head/tail partner */
+ struct sk_buff *head;
+ struct sk_buff *tail;
+ };
+#endif
+ dma_addr_t dma;
+#ifndef IXGBE_NO_LRO
+ __be32 tsecr; /* timestamp echo response */
+ u32 tsval; /* timestamp value in host order */
+ u32 next_seq; /* next expected sequence number */
+ u16 free; /* 65521 minus total size */
+ u16 mss; /* size of data portion of packet */
+#endif /* IXGBE_NO_LRO */
+#ifdef HAVE_VLAN_RX_REGISTER
+ u16 vid; /* VLAN tag */
+#endif
+ u16 append_cnt; /* number of skb's appended */
+#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+ bool page_released;
+#endif
+};
+#define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb)
+
+#ifdef IXGBE_SYSFS
+void ixgbe_sysfs_exit(struct ixgbe_adapter *adapter);
+int ixgbe_sysfs_init(struct ixgbe_adapter *adapter);
+#endif /* IXGBE_SYSFS */
+#ifdef IXGBE_PROCFS
+void ixgbe_procfs_exit(struct ixgbe_adapter *adapter);
+int ixgbe_procfs_init(struct ixgbe_adapter *adapter);
+int ixgbe_procfs_topdir_init(void);
+void ixgbe_procfs_topdir_exit(void);
+#endif /* IXGBE_PROCFS */
+
+extern struct dcbnl_rtnl_ops dcbnl_ops;
+extern int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max);
+
+extern u8 ixgbe_dcb_txq_to_tc(struct ixgbe_adapter *adapter, u8 index);
+
+/* needed by ixgbe_main.c */
+extern int ixgbe_validate_mac_addr(u8 *mc_addr);
+extern void ixgbe_check_options(struct ixgbe_adapter *adapter);
+extern void ixgbe_assign_netdev_ops(struct net_device *netdev);
+
+/* needed by ixgbe_ethtool.c */
+extern char ixgbe_driver_name[];
+extern const char ixgbe_driver_version[];
+
+extern void ixgbe_up(struct ixgbe_adapter *adapter);
+extern void ixgbe_down(struct ixgbe_adapter *adapter);
+extern void ixgbe_reinit_locked(struct ixgbe_adapter *adapter);
+extern void ixgbe_reset(struct ixgbe_adapter *adapter);
+extern void ixgbe_set_ethtool_ops(struct net_device *netdev);
+extern int ixgbe_setup_rx_resources(struct ixgbe_ring *);
+extern int ixgbe_setup_tx_resources(struct ixgbe_ring *);
+extern void ixgbe_free_rx_resources(struct ixgbe_ring *);
+extern void ixgbe_free_tx_resources(struct ixgbe_ring *);
+extern void ixgbe_configure_rx_ring(struct ixgbe_adapter *,
+ struct ixgbe_ring *);
+extern void ixgbe_configure_tx_ring(struct ixgbe_adapter *,
+ struct ixgbe_ring *);
+extern void ixgbe_update_stats(struct ixgbe_adapter *adapter);
+extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
+extern void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter);
+extern bool ixgbe_is_ixgbe(struct pci_dev *pcidev);
+extern netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *,
+ struct ixgbe_adapter *,
+ struct ixgbe_ring *);
+extern void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *,
+ struct ixgbe_tx_buffer *);
+extern void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16);
+extern void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter,
+ struct ixgbe_ring *);
+extern void ixgbe_clear_rscctl(struct ixgbe_adapter *adapter,
+ struct ixgbe_ring *);
+extern void ixgbe_set_rx_mode(struct net_device *netdev);
+extern int ixgbe_write_mc_addr_list(struct net_device *netdev);
+extern int ixgbe_setup_tc(struct net_device *dev, u8 tc);
+#ifdef IXGBE_FCOE
+extern void ixgbe_tx_ctxtdesc(struct ixgbe_ring *, u32, u32, u32, u32);
+#endif /* IXGBE_FCOE */
+extern void ixgbe_do_reset(struct net_device *netdev);
+extern void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector);
+extern void ixgbe_disable_rx_queue(struct ixgbe_adapter *adapter,
+ struct ixgbe_ring *);
+extern void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter);
+extern void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter);
+#ifdef ETHTOOL_OPS_COMPAT
+extern int ethtool_ioctl(struct ifreq *ifr);
+#endif
+
+#ifdef IXGBE_FCOE
+extern void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter);
+extern int ixgbe_fso(struct ixgbe_ring *tx_ring,
+ struct ixgbe_tx_buffer *first,
+ u8 *hdr_len);
+extern void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter);
+extern int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter,
+ union ixgbe_adv_rx_desc *rx_desc,
+ struct sk_buff *skb);
+extern int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
+ struct scatterlist *sgl, unsigned int sgc);
+#ifdef HAVE_NETDEV_OPS_FCOE_DDP_TARGET
+extern int ixgbe_fcoe_ddp_target(struct net_device *netdev, u16 xid,
+ struct scatterlist *sgl, unsigned int sgc);
+#endif /* HAVE_NETDEV_OPS_FCOE_DDP_TARGET */
+extern int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid);
+#ifdef HAVE_NETDEV_OPS_FCOE_ENABLE
+extern int ixgbe_fcoe_enable(struct net_device *netdev);
+extern int ixgbe_fcoe_disable(struct net_device *netdev);
+#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */
+#ifdef CONFIG_DCB
+#ifdef HAVE_DCBNL_OPS_GETAPP
+extern u8 ixgbe_fcoe_getapp(struct net_device *netdev);
+#endif /* HAVE_DCBNL_OPS_GETAPP */
+extern u8 ixgbe_fcoe_setapp(struct ixgbe_adapter *adapter, u8 up);
+#endif /* CONFIG_DCB */
+#ifdef HAVE_NETDEV_OPS_FCOE_GETWWN
+extern int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type);
+#endif
+#endif /* IXGBE_FCOE */
+
+#ifdef CONFIG_DCB
+#ifdef HAVE_DCBNL_IEEE
+s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max_frame);
+#endif /* HAVE_DCBNL_IEEE */
+#endif /* CONFIG_DCB */
+
+extern void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring);
+extern int ixgbe_get_settings(struct net_device *netdev,
+ struct ethtool_cmd *ecmd);
+extern int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter,
+ struct net_device *netdev, unsigned int vfn);
+extern void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter);
+extern int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
+ u8 *addr, u16 queue);
+extern int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
+ u8 *addr, u16 queue);
+extern int ixgbe_available_rars(struct ixgbe_adapter *adapter);
+#ifndef HAVE_VLAN_RX_REGISTER
+extern void ixgbe_vlan_mode(struct net_device *, u32);
+#endif
+#ifndef ixgbe_get_netdev_tc_txq
+#define ixgbe_get_netdev_tc_txq(dev, tc) (&dev->tc_to_txq[tc])
+#endif
+extern void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter);
+#endif /* _IXGBE_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
new file mode 100644
index 00000000..24015844
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
@@ -0,0 +1,1296 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_type.h"
+#include "ixgbe_82598.h"
+#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+
+static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed,
+ bool *autoneg);
+static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw);
+static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw,
+ bool autoneg_wait_to_complete);
+static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed, bool *link_up,
+ bool link_up_wait_to_complete);
+static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg,
+ bool autoneg_wait_to_complete);
+static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg,
+ bool autoneg_wait_to_complete);
+static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw);
+static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw);
+static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb,
+ u32 headroom, int strategy);
+
+/**
+ * ixgbe_set_pcie_completion_timeout - set pci-e completion timeout
+ * @hw: pointer to the HW structure
+ *
+ * The defaults for 82598 should be in the range of 50us to 50ms,
+ * however the hardware default for these parts is 500us to 1ms which is less
+ * than the 10ms recommended by the pci-e spec. To address this we need to
+ * increase the value to either 10ms to 250ms for capability version 1 config,
+ * or 16ms to 55ms for version 2.
+ **/
+void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw)
+{
+ u32 gcr = IXGBE_READ_REG(hw, IXGBE_GCR);
+ u16 pcie_devctl2;
+
+ /* only take action if timeout value is defaulted to 0 */
+ if (gcr & IXGBE_GCR_CMPL_TMOUT_MASK)
+ goto out;
+
+ /*
+ * if capababilities version is type 1 we can write the
+ * timeout of 10ms to 250ms through the GCR register
+ */
+ if (!(gcr & IXGBE_GCR_CAP_VER2)) {
+ gcr |= IXGBE_GCR_CMPL_TMOUT_10ms;
+ goto out;
+ }
+
+ /*
+ * for version 2 capabilities we need to write the config space
+ * directly in order to set the completion timeout value for
+ * 16ms to 55ms
+ */
+ pcie_devctl2 = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2);
+ pcie_devctl2 |= IXGBE_PCI_DEVICE_CONTROL2_16ms;
+ IXGBE_WRITE_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2, pcie_devctl2);
+out:
+ /* disable completion timeout resend */
+ gcr &= ~IXGBE_GCR_CMPL_TMOUT_RESEND;
+ IXGBE_WRITE_REG(hw, IXGBE_GCR, gcr);
+}
+
+/**
+ * ixgbe_init_ops_82598 - Inits func ptrs and MAC type
+ * @hw: pointer to hardware structure
+ *
+ * Initialize the function pointers and assign the MAC type for 82598.
+ * Does not touch the hardware.
+ **/
+s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw)
+{
+ struct ixgbe_mac_info *mac = &hw->mac;
+ struct ixgbe_phy_info *phy = &hw->phy;
+ s32 ret_val;
+
+ ret_val = ixgbe_init_phy_ops_generic(hw);
+ ret_val = ixgbe_init_ops_generic(hw);
+
+ /* PHY */
+ phy->ops.init = &ixgbe_init_phy_ops_82598;
+
+ /* MAC */
+ mac->ops.start_hw = &ixgbe_start_hw_82598;
+ mac->ops.reset_hw = &ixgbe_reset_hw_82598;
+ mac->ops.get_media_type = &ixgbe_get_media_type_82598;
+ mac->ops.get_supported_physical_layer =
+ &ixgbe_get_supported_physical_layer_82598;
+ mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82598;
+ mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82598;
+ mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie_82598;
+
+ /* RAR, Multicast, VLAN */
+ mac->ops.set_vmdq = &ixgbe_set_vmdq_82598;
+ mac->ops.clear_vmdq = &ixgbe_clear_vmdq_82598;
+ mac->ops.set_vfta = &ixgbe_set_vfta_82598;
+ mac->ops.set_vlvf = NULL;
+ mac->ops.clear_vfta = &ixgbe_clear_vfta_82598;
+
+ /* Flow Control */
+ mac->ops.fc_enable = &ixgbe_fc_enable_82598;
+
+ mac->mcft_size = 128;
+ mac->vft_size = 128;
+ mac->num_rar_entries = 16;
+ mac->rx_pb_size = 512;
+ mac->max_tx_queues = 32;
+ mac->max_rx_queues = 64;
+ mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw);
+
+ /* SFP+ Module */
+ phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_82598;
+
+ /* Link */
+ mac->ops.check_link = &ixgbe_check_mac_link_82598;
+ mac->ops.setup_link = &ixgbe_setup_mac_link_82598;
+ mac->ops.flap_tx_laser = NULL;
+ mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82598;
+ mac->ops.setup_rxpba = &ixgbe_set_rxpba_82598;
+
+ /* Manageability interface */
+ mac->ops.set_fw_drv_ver = NULL;
+
+ return ret_val;
+}
+
+/**
+ * ixgbe_init_phy_ops_82598 - PHY/SFP specific init
+ * @hw: pointer to hardware structure
+ *
+ * Initialize any function pointers that were not able to be
+ * set during init_shared_code because the PHY/SFP type was
+ * not known. Perform the SFP init if necessary.
+ *
+ **/
+s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw)
+{
+ struct ixgbe_mac_info *mac = &hw->mac;
+ struct ixgbe_phy_info *phy = &hw->phy;
+ s32 ret_val = 0;
+ u16 list_offset, data_offset;
+
+ /* Identify the PHY */
+ phy->ops.identify(hw);
+
+ /* Overwrite the link function pointers if copper PHY */
+ if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) {
+ mac->ops.setup_link = &ixgbe_setup_copper_link_82598;
+ mac->ops.get_link_capabilities =
+ &ixgbe_get_copper_link_capabilities_generic;
+ }
+
+ switch (hw->phy.type) {
+ case ixgbe_phy_tn:
+ phy->ops.setup_link = &ixgbe_setup_phy_link_tnx;
+ phy->ops.check_link = &ixgbe_check_phy_link_tnx;
+ phy->ops.get_firmware_version =
+ &ixgbe_get_phy_firmware_version_tnx;
+ break;
+ case ixgbe_phy_nl:
+ phy->ops.reset = &ixgbe_reset_phy_nl;
+
+ /* Call SFP+ identify routine to get the SFP+ module type */
+ ret_val = phy->ops.identify_sfp(hw);
+ if (ret_val != 0)
+ goto out;
+ else if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) {
+ ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED;
+ goto out;
+ }
+
+ /* Check to see if SFP+ module is supported */
+ ret_val = ixgbe_get_sfp_init_sequence_offsets(hw,
+ &list_offset,
+ &data_offset);
+ if (ret_val != 0) {
+ ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED;
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_start_hw_82598 - Prepare hardware for Tx/Rx
+ * @hw: pointer to hardware structure
+ *
+ * Starts the hardware using the generic start_hw function.
+ * Disables relaxed ordering Then set pcie completion timeout
+ *
+ **/
+s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
+{
+ u32 regval;
+ u32 i;
+ s32 ret_val = 0;
+
+ ret_val = ixgbe_start_hw_generic(hw);
+
+ /* Disable relaxed ordering */
+ for (i = 0; ((i < hw->mac.max_tx_queues) &&
+ (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
+ regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
+ regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
+ IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval);
+ }
+
+ for (i = 0; ((i < hw->mac.max_rx_queues) &&
+ (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
+ regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
+ regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
+ IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
+ IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
+ }
+
+ /* set the completion timeout for interface */
+ if (ret_val == 0)
+ ixgbe_set_pcie_completion_timeout(hw);
+
+ return ret_val;
+}
+
+/**
+ * ixgbe_get_link_capabilities_82598 - Determines link capabilities
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @autoneg: boolean auto-negotiation value
+ *
+ * Determines the link capabilities by reading the AUTOC register.
+ **/
+static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed,
+ bool *autoneg)
+{
+ s32 status = 0;
+ u32 autoc = 0;
+
+ /*
+ * Determine link capabilities based on the stored value of AUTOC,
+ * which represents EEPROM defaults. If AUTOC value has not been
+ * stored, use the current register value.
+ */
+ if (hw->mac.orig_link_settings_stored)
+ autoc = hw->mac.orig_autoc;
+ else
+ autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+
+ switch (autoc & IXGBE_AUTOC_LMS_MASK) {
+ case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
+ *speed = IXGBE_LINK_SPEED_1GB_FULL;
+ *autoneg = false;
+ break;
+
+ case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
+ *speed = IXGBE_LINK_SPEED_10GB_FULL;
+ *autoneg = false;
+ break;
+
+ case IXGBE_AUTOC_LMS_1G_AN:
+ *speed = IXGBE_LINK_SPEED_1GB_FULL;
+ *autoneg = true;
+ break;
+
+ case IXGBE_AUTOC_LMS_KX4_AN:
+ case IXGBE_AUTOC_LMS_KX4_AN_1G_AN:
+ *speed = IXGBE_LINK_SPEED_UNKNOWN;
+ if (autoc & IXGBE_AUTOC_KX4_SUPP)
+ *speed |= IXGBE_LINK_SPEED_10GB_FULL;
+ if (autoc & IXGBE_AUTOC_KX_SUPP)
+ *speed |= IXGBE_LINK_SPEED_1GB_FULL;
+ *autoneg = true;
+ break;
+
+ default:
+ status = IXGBE_ERR_LINK_SETUP;
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_get_media_type_82598 - Determines media type
+ * @hw: pointer to hardware structure
+ *
+ * Returns the media type (fiber, copper, backplane)
+ **/
+static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw)
+{
+ enum ixgbe_media_type media_type;
+
+ /* Detect if there is a copper PHY attached. */
+ switch (hw->phy.type) {
+ case ixgbe_phy_cu_unknown:
+ case ixgbe_phy_tn:
+ media_type = ixgbe_media_type_copper;
+ goto out;
+ default:
+ break;
+ }
+
+ /* Media type for I82598 is based on device ID */
+ switch (hw->device_id) {
+ case IXGBE_DEV_ID_82598:
+ case IXGBE_DEV_ID_82598_BX:
+ /* Default device ID is mezzanine card KX/KX4 */
+ media_type = ixgbe_media_type_backplane;
+ break;
+ case IXGBE_DEV_ID_82598AF_DUAL_PORT:
+ case IXGBE_DEV_ID_82598AF_SINGLE_PORT:
+ case IXGBE_DEV_ID_82598_DA_DUAL_PORT:
+ case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM:
+ case IXGBE_DEV_ID_82598EB_XF_LR:
+ case IXGBE_DEV_ID_82598EB_SFP_LOM:
+ media_type = ixgbe_media_type_fiber;
+ break;
+ case IXGBE_DEV_ID_82598EB_CX4:
+ case IXGBE_DEV_ID_82598_CX4_DUAL_PORT:
+ media_type = ixgbe_media_type_cx4;
+ break;
+ case IXGBE_DEV_ID_82598AT:
+ case IXGBE_DEV_ID_82598AT2:
+ media_type = ixgbe_media_type_copper;
+ break;
+ default:
+ media_type = ixgbe_media_type_unknown;
+ break;
+ }
+out:
+ return media_type;
+}
+
+/**
+ * ixgbe_fc_enable_82598 - Enable flow control
+ * @hw: pointer to hardware structure
+ *
+ * Enable flow control according to the current settings.
+ **/
+s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw)
+{
+ s32 ret_val = 0;
+ u32 fctrl_reg;
+ u32 rmcs_reg;
+ u32 reg;
+ u32 fcrtl, fcrth;
+ u32 link_speed = 0;
+ int i;
+ bool link_up;
+
+ /* Validate the water mark configuration */
+ if (!hw->fc.pause_time) {
+ ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+ goto out;
+ }
+
+ /* Low water mark of zero causes XOFF floods */
+ for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
+ if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+ hw->fc.high_water[i]) {
+ if (!hw->fc.low_water[i] ||
+ hw->fc.low_water[i] >= hw->fc.high_water[i]) {
+ hw_dbg(hw, "Invalid water mark configuration\n");
+ ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+ goto out;
+ }
+ }
+ }
+
+ /*
+ * On 82598 having Rx FC on causes resets while doing 1G
+ * so if it's on turn it off once we know link_speed. For
+ * more details see 82598 Specification update.
+ */
+ hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+ if (link_up && link_speed == IXGBE_LINK_SPEED_1GB_FULL) {
+ switch (hw->fc.requested_mode) {
+ case ixgbe_fc_full:
+ hw->fc.requested_mode = ixgbe_fc_tx_pause;
+ break;
+ case ixgbe_fc_rx_pause:
+ hw->fc.requested_mode = ixgbe_fc_none;
+ break;
+ default:
+ /* no change */
+ break;
+ }
+ }
+
+ /* Negotiate the fc mode to use */
+ ixgbe_fc_autoneg(hw);
+
+ /* Disable any previous flow control settings */
+ fctrl_reg = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+ fctrl_reg &= ~(IXGBE_FCTRL_RFCE | IXGBE_FCTRL_RPFCE);
+
+ rmcs_reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
+ rmcs_reg &= ~(IXGBE_RMCS_TFCE_PRIORITY | IXGBE_RMCS_TFCE_802_3X);
+
+ /*
+ * The possible values of fc.current_mode are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause frames,
+ * but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames but
+ * we do not support receiving pause frames).
+ * 3: Both Rx and Tx flow control (symmetric) are enabled.
+ * other: Invalid.
+ */
+ switch (hw->fc.current_mode) {
+ case ixgbe_fc_none:
+ /*
+ * Flow control is disabled by software override or autoneg.
+ * The code below will actually disable it in the HW.
+ */
+ break;
+ case ixgbe_fc_rx_pause:
+ /*
+ * Rx Flow control is enabled and Tx Flow control is
+ * disabled by software override. Since there really
+ * isn't a way to advertise that we are capable of RX
+ * Pause ONLY, we will advertise that we support both
+ * symmetric and asymmetric Rx PAUSE. Later, we will
+ * disable the adapter's ability to send PAUSE frames.
+ */
+ fctrl_reg |= IXGBE_FCTRL_RFCE;
+ break;
+ case ixgbe_fc_tx_pause:
+ /*
+ * Tx Flow control is enabled, and Rx Flow control is
+ * disabled by software override.
+ */
+ rmcs_reg |= IXGBE_RMCS_TFCE_802_3X;
+ break;
+ case ixgbe_fc_full:
+ /* Flow control (both Rx and Tx) is enabled by SW override. */
+ fctrl_reg |= IXGBE_FCTRL_RFCE;
+ rmcs_reg |= IXGBE_RMCS_TFCE_802_3X;
+ break;
+ default:
+ hw_dbg(hw, "Flow control param set incorrectly\n");
+ ret_val = IXGBE_ERR_CONFIG;
+ goto out;
+ break;
+ }
+
+ /* Set 802.3x based flow control settings. */
+ fctrl_reg |= IXGBE_FCTRL_DPF;
+ IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl_reg);
+ IXGBE_WRITE_REG(hw, IXGBE_RMCS, rmcs_reg);
+
+ /* Set up and enable Rx high/low water mark thresholds, enable XON. */
+ for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
+ if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+ hw->fc.high_water[i]) {
+ fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
+ fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl);
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), fcrth);
+ } else {
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), 0);
+ }
+
+ }
+
+ /* Configure pause time (2 TCs per register) */
+ reg = hw->fc.pause_time * 0x00010001;
+ for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++)
+ IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
+
+ /* Configure flow control refresh threshold value */
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2);
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_start_mac_link_82598 - Configures MAC link settings
+ * @hw: pointer to hardware structure
+ *
+ * Configures link settings based on values in the ixgbe_hw struct.
+ * Restarts the link. Performs autonegotiation if needed.
+ **/
+static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw,
+ bool autoneg_wait_to_complete)
+{
+ u32 autoc_reg;
+ u32 links_reg;
+ u32 i;
+ s32 status = 0;
+
+ /* Restart link */
+ autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
+
+ /* Only poll for autoneg to complete if specified to do so */
+ if (autoneg_wait_to_complete) {
+ if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+ IXGBE_AUTOC_LMS_KX4_AN ||
+ (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+ IXGBE_AUTOC_LMS_KX4_AN_1G_AN) {
+ links_reg = 0; /* Just in case Autoneg time = 0 */
+ for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
+ links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+ if (links_reg & IXGBE_LINKS_KX_AN_COMP)
+ break;
+ msleep(100);
+ }
+ if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
+ status = IXGBE_ERR_AUTONEG_NOT_COMPLETE;
+ hw_dbg(hw, "Autonegotiation did not complete.\n");
+ }
+ }
+ }
+
+ /* Add delay to filter out noises during initial link setup */
+ msleep(50);
+
+ return status;
+}
+
+/**
+ * ixgbe_validate_link_ready - Function looks for phy link
+ * @hw: pointer to hardware structure
+ *
+ * Function indicates success when phy link is available. If phy is not ready
+ * within 5 seconds of MAC indicating link, the function returns error.
+ **/
+static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw)
+{
+ u32 timeout;
+ u16 an_reg;
+
+ if (hw->device_id != IXGBE_DEV_ID_82598AT2)
+ return 0;
+
+ for (timeout = 0;
+ timeout < IXGBE_VALIDATE_LINK_READY_TIMEOUT; timeout++) {
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &an_reg);
+
+ if ((an_reg & IXGBE_MII_AUTONEG_COMPLETE) &&
+ (an_reg & IXGBE_MII_AUTONEG_LINK_UP))
+ break;
+
+ msleep(100);
+ }
+
+ if (timeout == IXGBE_VALIDATE_LINK_READY_TIMEOUT) {
+ hw_dbg(hw, "Link was indicated but link is down\n");
+ return IXGBE_ERR_LINK_SETUP;
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_check_mac_link_82598 - Get link/speed status
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @link_up: true is link is up, false otherwise
+ * @link_up_wait_to_complete: bool used to wait for link up or not
+ *
+ * Reads the links register to determine if link is up and the current speed
+ **/
+static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed, bool *link_up,
+ bool link_up_wait_to_complete)
+{
+ u32 links_reg;
+ u32 i;
+ u16 link_reg, adapt_comp_reg;
+
+ /*
+ * SERDES PHY requires us to read link status from undocumented
+ * register 0xC79F. Bit 0 set indicates link is up/ready; clear
+ * indicates link down. OxC00C is read to check that the XAUI lanes
+ * are active. Bit 0 clear indicates active; set indicates inactive.
+ */
+ if (hw->phy.type == ixgbe_phy_nl) {
+ hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg);
+ hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg);
+ hw->phy.ops.read_reg(hw, 0xC00C, IXGBE_TWINAX_DEV,
+ &adapt_comp_reg);
+ if (link_up_wait_to_complete) {
+ for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
+ if ((link_reg & 1) &&
+ ((adapt_comp_reg & 1) == 0)) {
+ *link_up = true;
+ break;
+ } else {
+ *link_up = false;
+ }
+ msleep(100);
+ hw->phy.ops.read_reg(hw, 0xC79F,
+ IXGBE_TWINAX_DEV,
+ &link_reg);
+ hw->phy.ops.read_reg(hw, 0xC00C,
+ IXGBE_TWINAX_DEV,
+ &adapt_comp_reg);
+ }
+ } else {
+ if ((link_reg & 1) && ((adapt_comp_reg & 1) == 0))
+ *link_up = true;
+ else
+ *link_up = false;
+ }
+
+ if (*link_up == false)
+ goto out;
+ }
+
+ links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+ if (link_up_wait_to_complete) {
+ for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
+ if (links_reg & IXGBE_LINKS_UP) {
+ *link_up = true;
+ break;
+ } else {
+ *link_up = false;
+ }
+ msleep(100);
+ links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+ }
+ } else {
+ if (links_reg & IXGBE_LINKS_UP)
+ *link_up = true;
+ else
+ *link_up = false;
+ }
+
+ if (links_reg & IXGBE_LINKS_SPEED)
+ *speed = IXGBE_LINK_SPEED_10GB_FULL;
+ else
+ *speed = IXGBE_LINK_SPEED_1GB_FULL;
+
+ if ((hw->device_id == IXGBE_DEV_ID_82598AT2) && (*link_up == true) &&
+ (ixgbe_validate_link_ready(hw) != 0))
+ *link_up = false;
+
+out:
+ return 0;
+}
+
+/**
+ * ixgbe_setup_mac_link_82598 - Set MAC link speed
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg: true if autonegotiation enabled
+ * @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ * Set the link speed in the AUTOC register and restarts link.
+ **/
+static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed, bool autoneg,
+ bool autoneg_wait_to_complete)
+{
+ s32 status = 0;
+ ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN;
+ u32 curr_autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ u32 autoc = curr_autoc;
+ u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK;
+
+ /* Check to see if speed passed in is supported. */
+ ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg);
+ speed &= link_capabilities;
+
+ if (speed == IXGBE_LINK_SPEED_UNKNOWN)
+ status = IXGBE_ERR_LINK_SETUP;
+
+ /* Set KX4/KX support according to speed requested */
+ else if (link_mode == IXGBE_AUTOC_LMS_KX4_AN ||
+ link_mode == IXGBE_AUTOC_LMS_KX4_AN_1G_AN) {
+ autoc &= ~IXGBE_AUTOC_KX4_KX_SUPP_MASK;
+ if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+ autoc |= IXGBE_AUTOC_KX4_SUPP;
+ if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+ autoc |= IXGBE_AUTOC_KX_SUPP;
+ if (autoc != curr_autoc)
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc);
+ }
+
+ if (status == 0) {
+ /*
+ * Setup and restart the link based on the new values in
+ * ixgbe_hw This will write the AUTOC register based on the new
+ * stored values
+ */
+ status = ixgbe_start_mac_link_82598(hw,
+ autoneg_wait_to_complete);
+ }
+
+ return status;
+}
+
+
+/**
+ * ixgbe_setup_copper_link_82598 - Set the PHY autoneg advertised field
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg: true if autonegotiation enabled
+ * @autoneg_wait_to_complete: true if waiting is needed to complete
+ *
+ * Sets the link speed in the AUTOC register in the MAC and restarts link.
+ **/
+static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg,
+ bool autoneg_wait_to_complete)
+{
+ s32 status;
+
+ /* Setup the PHY according to input speed */
+ status = hw->phy.ops.setup_link_speed(hw, speed, autoneg,
+ autoneg_wait_to_complete);
+ /* Set up MAC */
+ ixgbe_start_mac_link_82598(hw, autoneg_wait_to_complete);
+
+ return status;
+}
+
+/**
+ * ixgbe_reset_hw_82598 - Performs hardware reset
+ * @hw: pointer to hardware structure
+ *
+ * Resets the hardware by resetting the transmit and receive units, masks and
+ * clears all interrupts, performing a PHY reset, and performing a link (MAC)
+ * reset.
+ **/
+static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+ s32 phy_status = 0;
+ u32 ctrl;
+ u32 gheccr;
+ u32 i;
+ u32 autoc;
+ u8 analog_val;
+
+ /* Call adapter stop to disable tx/rx and clear interrupts */
+ status = hw->mac.ops.stop_adapter(hw);
+ if (status != 0)
+ goto reset_hw_out;
+
+ /*
+ * Power up the Atlas Tx lanes if they are currently powered down.
+ * Atlas Tx lanes are powered down for MAC loopback tests, but
+ * they are not automatically restored on reset.
+ */
+ hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &analog_val);
+ if (analog_val & IXGBE_ATLAS_PDN_TX_REG_EN) {
+ /* Enable Tx Atlas so packets can be transmitted again */
+ hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK,
+ &analog_val);
+ analog_val &= ~IXGBE_ATLAS_PDN_TX_REG_EN;
+ hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK,
+ analog_val);
+
+ hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G,
+ &analog_val);
+ analog_val &= ~IXGBE_ATLAS_PDN_TX_10G_QL_ALL;
+ hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G,
+ analog_val);
+
+ hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G,
+ &analog_val);
+ analog_val &= ~IXGBE_ATLAS_PDN_TX_1G_QL_ALL;
+ hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G,
+ analog_val);
+
+ hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN,
+ &analog_val);
+ analog_val &= ~IXGBE_ATLAS_PDN_TX_AN_QL_ALL;
+ hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN,
+ analog_val);
+ }
+
+ /* Reset PHY */
+ if (hw->phy.reset_disable == false) {
+ /* PHY ops must be identified and initialized prior to reset */
+
+ /* Init PHY and function pointers, perform SFP setup */
+ phy_status = hw->phy.ops.init(hw);
+ if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+ goto reset_hw_out;
+ if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT)
+ goto mac_reset_top;
+
+ hw->phy.ops.reset(hw);
+ }
+
+mac_reset_top:
+ /*
+ * Issue global reset to the MAC. This needs to be a SW reset.
+ * If link reset is used, it might reset the MAC when mng is using it
+ */
+ ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST;
+ IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* Poll for reset bit to self-clear indicating reset is complete */
+ for (i = 0; i < 10; i++) {
+ udelay(1);
+ ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+ if (!(ctrl & IXGBE_CTRL_RST))
+ break;
+ }
+ if (ctrl & IXGBE_CTRL_RST) {
+ status = IXGBE_ERR_RESET_FAILED;
+ hw_dbg(hw, "Reset polling failed to complete.\n");
+ }
+
+ msleep(50);
+
+ /*
+ * Double resets are required for recovery from certain error
+ * conditions. Between resets, it is necessary to stall to allow time
+ * for any pending HW events to complete.
+ */
+ if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
+ hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+ goto mac_reset_top;
+ }
+
+ gheccr = IXGBE_READ_REG(hw, IXGBE_GHECCR);
+ gheccr &= ~((1 << 21) | (1 << 18) | (1 << 9) | (1 << 6));
+ IXGBE_WRITE_REG(hw, IXGBE_GHECCR, gheccr);
+
+ /*
+ * Store the original AUTOC value if it has not been
+ * stored off yet. Otherwise restore the stored original
+ * AUTOC value since the reset operation sets back to deaults.
+ */
+ autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ if (hw->mac.orig_link_settings_stored == false) {
+ hw->mac.orig_autoc = autoc;
+ hw->mac.orig_link_settings_stored = true;
+ } else if (autoc != hw->mac.orig_autoc) {
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, hw->mac.orig_autoc);
+ }
+
+ /* Store the permanent mac address */
+ hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
+ /*
+ * Store MAC address from RAR0, clear receive address registers, and
+ * clear the multicast table
+ */
+ hw->mac.ops.init_rx_addrs(hw);
+
+reset_hw_out:
+ if (phy_status != 0)
+ status = phy_status;
+
+ return status;
+}
+
+/**
+ * ixgbe_set_vmdq_82598 - Associate a VMDq set index with a rx address
+ * @hw: pointer to hardware struct
+ * @rar: receive address register index to associate with a VMDq index
+ * @vmdq: VMDq set index
+ **/
+s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+ u32 rar_high;
+ u32 rar_entries = hw->mac.num_rar_entries;
+
+ /* Make sure we are using a valid rar index range */
+ if (rar >= rar_entries) {
+ hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+ return IXGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar));
+ rar_high &= ~IXGBE_RAH_VIND_MASK;
+ rar_high |= ((vmdq << IXGBE_RAH_VIND_SHIFT) & IXGBE_RAH_VIND_MASK);
+ IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high);
+ return 0;
+}
+
+/**
+ * ixgbe_clear_vmdq_82598 - Disassociate a VMDq set index from an rx address
+ * @hw: pointer to hardware struct
+ * @rar: receive address register index to associate with a VMDq index
+ * @vmdq: VMDq clear index (not used in 82598, but elsewhere)
+ **/
+static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+ u32 rar_high;
+ u32 rar_entries = hw->mac.num_rar_entries;
+
+
+ /* Make sure we are using a valid rar index range */
+ if (rar >= rar_entries) {
+ hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+ return IXGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar));
+ if (rar_high & IXGBE_RAH_VIND_MASK) {
+ rar_high &= ~IXGBE_RAH_VIND_MASK;
+ IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high);
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_set_vfta_82598 - Set VLAN filter table
+ * @hw: pointer to hardware structure
+ * @vlan: VLAN id to write to VLAN filter
+ * @vind: VMDq output index that maps queue to VLAN id in VFTA
+ * @vlan_on: boolean flag to turn on/off VLAN in VFTA
+ *
+ * Turn on/off specified VLAN in the VLAN filter table.
+ **/
+s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+ bool vlan_on)
+{
+ u32 regindex;
+ u32 bitindex;
+ u32 bits;
+ u32 vftabyte;
+
+ if (vlan > 4095)
+ return IXGBE_ERR_PARAM;
+
+ /* Determine 32-bit word position in array */
+ regindex = (vlan >> 5) & 0x7F; /* upper seven bits */
+
+ /* Determine the location of the (VMD) queue index */
+ vftabyte = ((vlan >> 3) & 0x03); /* bits (4:3) indicating byte array */
+ bitindex = (vlan & 0x7) << 2; /* lower 3 bits indicate nibble */
+
+ /* Set the nibble for VMD queue index */
+ bits = IXGBE_READ_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex));
+ bits &= (~(0x0F << bitindex));
+ bits |= (vind << bitindex);
+ IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex), bits);
+
+ /* Determine the location of the bit for this VLAN id */
+ bitindex = vlan & 0x1F; /* lower five bits */
+
+ bits = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex));
+ if (vlan_on)
+ /* Turn on this VLAN id */
+ bits |= (1 << bitindex);
+ else
+ /* Turn off this VLAN id */
+ bits &= ~(1 << bitindex);
+ IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), bits);
+
+ return 0;
+}
+
+/**
+ * ixgbe_clear_vfta_82598 - Clear VLAN filter table
+ * @hw: pointer to hardware structure
+ *
+ * Clears the VLAN filer table, and the VMDq index associated with the filter
+ **/
+static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw)
+{
+ u32 offset;
+ u32 vlanbyte;
+
+ for (offset = 0; offset < hw->mac.vft_size; offset++)
+ IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0);
+
+ for (vlanbyte = 0; vlanbyte < 4; vlanbyte++)
+ for (offset = 0; offset < hw->mac.vft_size; offset++)
+ IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vlanbyte, offset),
+ 0);
+
+ return 0;
+}
+
+/**
+ * ixgbe_read_analog_reg8_82598 - Reads 8 bit Atlas analog register
+ * @hw: pointer to hardware structure
+ * @reg: analog register to read
+ * @val: read value
+ *
+ * Performs read operation to Atlas analog register specified.
+ **/
+s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val)
+{
+ u32 atlas_ctl;
+
+ IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL,
+ IXGBE_ATLASCTL_WRITE_CMD | (reg << 8));
+ IXGBE_WRITE_FLUSH(hw);
+ udelay(10);
+ atlas_ctl = IXGBE_READ_REG(hw, IXGBE_ATLASCTL);
+ *val = (u8)atlas_ctl;
+
+ return 0;
+}
+
+/**
+ * ixgbe_write_analog_reg8_82598 - Writes 8 bit Atlas analog register
+ * @hw: pointer to hardware structure
+ * @reg: atlas register to write
+ * @val: value to write
+ *
+ * Performs write operation to Atlas analog register specified.
+ **/
+s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val)
+{
+ u32 atlas_ctl;
+
+ atlas_ctl = (reg << 8) | val;
+ IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL, atlas_ctl);
+ IXGBE_WRITE_FLUSH(hw);
+ udelay(10);
+
+ return 0;
+}
+
+/**
+ * ixgbe_read_i2c_eeprom_82598 - Reads 8 bit word over I2C interface.
+ * @hw: pointer to hardware structure
+ * @byte_offset: EEPROM byte offset to read
+ * @eeprom_data: value read
+ *
+ * Performs 8 byte read operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 *eeprom_data)
+{
+ s32 status = 0;
+ u16 sfp_addr = 0;
+ u16 sfp_data = 0;
+ u16 sfp_stat = 0;
+ u32 i;
+
+ if (hw->phy.type == ixgbe_phy_nl) {
+ /*
+ * NetLogic phy SDA/SCL registers are at addresses 0xC30A to
+ * 0xC30D. These registers are used to talk to the SFP+
+ * module's EEPROM through the SDA/SCL (I2C) interface.
+ */
+ sfp_addr = (IXGBE_I2C_EEPROM_DEV_ADDR << 8) + byte_offset;
+ sfp_addr = (sfp_addr | IXGBE_I2C_EEPROM_READ_MASK);
+ hw->phy.ops.write_reg(hw,
+ IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+ sfp_addr);
+
+ /* Poll status */
+ for (i = 0; i < 100; i++) {
+ hw->phy.ops.read_reg(hw,
+ IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+ &sfp_stat);
+ sfp_stat = sfp_stat & IXGBE_I2C_EEPROM_STATUS_MASK;
+ if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS)
+ break;
+ msleep(10);
+ }
+
+ if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_PASS) {
+ hw_dbg(hw, "EEPROM read did not pass.\n");
+ status = IXGBE_ERR_SFP_NOT_PRESENT;
+ goto out;
+ }
+
+ /* Read data */
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE, &sfp_data);
+
+ *eeprom_data = (u8)(sfp_data >> 8);
+ } else {
+ status = IXGBE_ERR_PHY;
+ goto out;
+ }
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_get_supported_physical_layer_82598 - Returns physical layer type
+ * @hw: pointer to hardware structure
+ *
+ * Determines physical layer capabilities of the current configuration.
+ **/
+u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw)
+{
+ u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
+ u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ u32 pma_pmd_10g = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK;
+ u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK;
+ u16 ext_ability = 0;
+
+ hw->phy.ops.identify(hw);
+
+ /* Copper PHY must be checked before AUTOC LMS to determine correct
+ * physical layer because 10GBase-T PHYs use LMS = KX4/KX */
+ switch (hw->phy.type) {
+ case ixgbe_phy_tn:
+ case ixgbe_phy_cu_unknown:
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability);
+ if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T;
+ if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
+ if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX;
+ goto out;
+ default:
+ break;
+ }
+
+ switch (autoc & IXGBE_AUTOC_LMS_MASK) {
+ case IXGBE_AUTOC_LMS_1G_AN:
+ case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
+ if (pma_pmd_1g == IXGBE_AUTOC_1G_KX)
+ physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX;
+ else
+ physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_BX;
+ break;
+ case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
+ if (pma_pmd_10g == IXGBE_AUTOC_10G_CX4)
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4;
+ else if (pma_pmd_10g == IXGBE_AUTOC_10G_KX4)
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
+ else /* XAUI */
+ physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
+ break;
+ case IXGBE_AUTOC_LMS_KX4_AN:
+ case IXGBE_AUTOC_LMS_KX4_AN_1G_AN:
+ if (autoc & IXGBE_AUTOC_KX_SUPP)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX;
+ if (autoc & IXGBE_AUTOC_KX4_SUPP)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
+ break;
+ default:
+ break;
+ }
+
+ if (hw->phy.type == ixgbe_phy_nl) {
+ hw->phy.ops.identify_sfp(hw);
+
+ switch (hw->phy.sfp_type) {
+ case ixgbe_sfp_type_da_cu:
+ physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU;
+ break;
+ case ixgbe_sfp_type_sr:
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR;
+ break;
+ case ixgbe_sfp_type_lr:
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR;
+ break;
+ default:
+ physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
+ break;
+ }
+ }
+
+ switch (hw->device_id) {
+ case IXGBE_DEV_ID_82598_DA_DUAL_PORT:
+ physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU;
+ break;
+ case IXGBE_DEV_ID_82598AF_DUAL_PORT:
+ case IXGBE_DEV_ID_82598AF_SINGLE_PORT:
+ case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM:
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR;
+ break;
+ case IXGBE_DEV_ID_82598EB_XF_LR:
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR;
+ break;
+ default:
+ break;
+ }
+
+out:
+ return physical_layer;
+}
+
+/**
+ * ixgbe_set_lan_id_multi_port_pcie_82598 - Set LAN id for PCIe multiple
+ * port devices.
+ * @hw: pointer to the HW structure
+ *
+ * Calls common function and corrects issue with some single port devices
+ * that enable LAN1 but not LAN0.
+ **/
+void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw)
+{
+ struct ixgbe_bus_info *bus = &hw->bus;
+ u16 pci_gen = 0;
+ u16 pci_ctrl2 = 0;
+
+ ixgbe_set_lan_id_multi_port_pcie(hw);
+
+ /* check if LAN0 is disabled */
+ hw->eeprom.ops.read(hw, IXGBE_PCIE_GENERAL_PTR, &pci_gen);
+ if ((pci_gen != 0) && (pci_gen != 0xFFFF)) {
+
+ hw->eeprom.ops.read(hw, pci_gen + IXGBE_PCIE_CTRL2, &pci_ctrl2);
+
+ /* if LAN0 is completely disabled force function to 0 */
+ if ((pci_ctrl2 & IXGBE_PCIE_CTRL2_LAN_DISABLE) &&
+ !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DISABLE_SELECT) &&
+ !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DUMMY_ENABLE)) {
+
+ bus->func = 0;
+ }
+ }
+}
+
+/**
+ * ixgbe_set_rxpba_82598 - Initialize RX packet buffer
+ * @hw: pointer to hardware structure
+ * @num_pb: number of packet buffers to allocate
+ * @headroom: reserve n KB of headroom
+ * @strategy: packet buffer allocation strategy
+ **/
+static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb,
+ u32 headroom, int strategy)
+{
+ u32 rxpktsize = IXGBE_RXPBSIZE_64KB;
+ u8 i = 0;
+
+ if (!num_pb)
+ return;
+
+ /* Setup Rx packet buffer sizes */
+ switch (strategy) {
+ case PBA_STRATEGY_WEIGHTED:
+ /* Setup the first four at 80KB */
+ rxpktsize = IXGBE_RXPBSIZE_80KB;
+ for (; i < 4; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+ /* Setup the last four at 48KB...don't re-init i */
+ rxpktsize = IXGBE_RXPBSIZE_48KB;
+ /* Fall Through */
+ case PBA_STRATEGY_EQUAL:
+ default:
+ /* Divide the remaining Rx packet buffer evenly among the TCs */
+ for (; i < IXGBE_MAX_PACKET_BUFFERS; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+ break;
+ }
+
+ /* Setup Tx packet buffer sizes */
+ for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), IXGBE_TXPBSIZE_40KB);
+
+ return;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
new file mode 100644
index 00000000..c6abb020
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
@@ -0,0 +1,44 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_82598_H_
+#define _IXGBE_82598_H_
+
+u32 ixgbe_get_pcie_msix_count_82598(struct ixgbe_hw *hw);
+s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw);
+s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on);
+s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val);
+s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val);
+s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 *eeprom_data);
+u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw);
+s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw);
+void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw);
+void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw);
+#endif /* _IXGBE_82598_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
new file mode 100644
index 00000000..017dfe16
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
@@ -0,0 +1,2313 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_type.h"
+#include "ixgbe_82599.h"
+#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+
+static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg,
+ bool autoneg_wait_to_complete);
+static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw);
+static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw,
+ u16 offset, u16 *data);
+static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data);
+static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data);
+static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data);
+
+void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw)
+{
+ struct ixgbe_mac_info *mac = &hw->mac;
+
+ /* enable the laser control functions for SFP+ fiber */
+ if (mac->ops.get_media_type(hw) == ixgbe_media_type_fiber) {
+ mac->ops.disable_tx_laser =
+ &ixgbe_disable_tx_laser_multispeed_fiber;
+ mac->ops.enable_tx_laser =
+ &ixgbe_enable_tx_laser_multispeed_fiber;
+ mac->ops.flap_tx_laser = &ixgbe_flap_tx_laser_multispeed_fiber;
+
+ } else {
+ mac->ops.disable_tx_laser = NULL;
+ mac->ops.enable_tx_laser = NULL;
+ mac->ops.flap_tx_laser = NULL;
+ }
+
+ if (hw->phy.multispeed_fiber) {
+ /* Set up dual speed SFP+ support */
+ mac->ops.setup_link = &ixgbe_setup_mac_link_multispeed_fiber;
+ } else {
+ if ((ixgbe_get_media_type(hw) == ixgbe_media_type_backplane) &&
+ (hw->phy.smart_speed == ixgbe_smart_speed_auto ||
+ hw->phy.smart_speed == ixgbe_smart_speed_on) &&
+ !ixgbe_verify_lesm_fw_enabled_82599(hw)) {
+ mac->ops.setup_link = &ixgbe_setup_mac_link_smartspeed;
+ } else {
+ mac->ops.setup_link = &ixgbe_setup_mac_link_82599;
+ }
+ }
+}
+
+/**
+ * ixgbe_init_phy_ops_82599 - PHY/SFP specific init
+ * @hw: pointer to hardware structure
+ *
+ * Initialize any function pointers that were not able to be
+ * set during init_shared_code because the PHY/SFP type was
+ * not known. Perform the SFP init if necessary.
+ *
+ **/
+s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw)
+{
+ struct ixgbe_mac_info *mac = &hw->mac;
+ struct ixgbe_phy_info *phy = &hw->phy;
+ s32 ret_val = 0;
+ u32 esdp;
+
+ if (hw->device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) {
+ /* Store flag indicating I2C bus access control unit. */
+ hw->phy.qsfp_shared_i2c_bus = TRUE;
+
+ /* Initialize access to QSFP+ I2C bus */
+ esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+ esdp |= IXGBE_ESDP_SDP0_DIR;
+ esdp &= ~IXGBE_ESDP_SDP1_DIR;
+ esdp &= ~IXGBE_ESDP_SDP0;
+ esdp &= ~IXGBE_ESDP_SDP0_NATIVE;
+ esdp &= ~IXGBE_ESDP_SDP1_NATIVE;
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+ IXGBE_WRITE_FLUSH(hw);
+
+ phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_82599;
+ phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_82599;
+ }
+ /* Identify the PHY or SFP module */
+ ret_val = phy->ops.identify(hw);
+ if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED)
+ goto init_phy_ops_out;
+
+ /* Setup function pointers based on detected SFP module and speeds */
+ ixgbe_init_mac_link_ops_82599(hw);
+ if (hw->phy.sfp_type != ixgbe_sfp_type_unknown)
+ hw->phy.ops.reset = NULL;
+
+ /* If copper media, overwrite with copper function pointers */
+ if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) {
+ mac->ops.setup_link = &ixgbe_setup_copper_link_82599;
+ mac->ops.get_link_capabilities =
+ &ixgbe_get_copper_link_capabilities_generic;
+ }
+
+ /* Set necessary function pointers based on phy type */
+ switch (hw->phy.type) {
+ case ixgbe_phy_tn:
+ phy->ops.setup_link = &ixgbe_setup_phy_link_tnx;
+ phy->ops.check_link = &ixgbe_check_phy_link_tnx;
+ phy->ops.get_firmware_version =
+ &ixgbe_get_phy_firmware_version_tnx;
+ break;
+ default:
+ break;
+ }
+init_phy_ops_out:
+ return ret_val;
+}
+
+s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw)
+{
+ s32 ret_val = 0;
+ u32 reg_anlp1 = 0;
+ u32 i = 0;
+ u16 list_offset, data_offset, data_value;
+
+ if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) {
+ ixgbe_init_mac_link_ops_82599(hw);
+
+ hw->phy.ops.reset = NULL;
+
+ ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset,
+ &data_offset);
+ if (ret_val != 0)
+ goto setup_sfp_out;
+
+ /* PHY config will finish before releasing the semaphore */
+ ret_val = hw->mac.ops.acquire_swfw_sync(hw,
+ IXGBE_GSSR_MAC_CSR_SM);
+ if (ret_val != 0) {
+ ret_val = IXGBE_ERR_SWFW_SYNC;
+ goto setup_sfp_out;
+ }
+
+ hw->eeprom.ops.read(hw, ++data_offset, &data_value);
+ while (data_value != 0xffff) {
+ IXGBE_WRITE_REG(hw, IXGBE_CORECTL, data_value);
+ IXGBE_WRITE_FLUSH(hw);
+ hw->eeprom.ops.read(hw, ++data_offset, &data_value);
+ }
+
+ /* Release the semaphore */
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
+ /* Delay obtaining semaphore again to allow FW access */
+ msleep(hw->eeprom.semaphore_delay);
+
+ /* Now restart DSP by setting Restart_AN and clearing LMS */
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, ((IXGBE_READ_REG(hw,
+ IXGBE_AUTOC) & ~IXGBE_AUTOC_LMS_MASK) |
+ IXGBE_AUTOC_AN_RESTART));
+
+ /* Wait for AN to leave state 0 */
+ for (i = 0; i < 10; i++) {
+ msleep(4);
+ reg_anlp1 = IXGBE_READ_REG(hw, IXGBE_ANLP1);
+ if (reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK)
+ break;
+ }
+ if (!(reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK)) {
+ hw_dbg(hw, "sfp module setup not complete\n");
+ ret_val = IXGBE_ERR_SFP_SETUP_NOT_COMPLETE;
+ goto setup_sfp_out;
+ }
+
+ /* Restart DSP by setting Restart_AN and return to SFI mode */
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (IXGBE_READ_REG(hw,
+ IXGBE_AUTOC) | IXGBE_AUTOC_LMS_10G_SERIAL |
+ IXGBE_AUTOC_AN_RESTART));
+ }
+
+setup_sfp_out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_init_ops_82599 - Inits func ptrs and MAC type
+ * @hw: pointer to hardware structure
+ *
+ * Initialize the function pointers and assign the MAC type for 82599.
+ * Does not touch the hardware.
+ **/
+
+s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw)
+{
+ struct ixgbe_mac_info *mac = &hw->mac;
+ struct ixgbe_phy_info *phy = &hw->phy;
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+ s32 ret_val;
+
+ ixgbe_init_phy_ops_generic(hw);
+ ret_val = ixgbe_init_ops_generic(hw);
+
+ /* PHY */
+ phy->ops.identify = &ixgbe_identify_phy_82599;
+ phy->ops.init = &ixgbe_init_phy_ops_82599;
+
+ /* MAC */
+ mac->ops.reset_hw = &ixgbe_reset_hw_82599;
+ mac->ops.get_media_type = &ixgbe_get_media_type_82599;
+ mac->ops.get_supported_physical_layer =
+ &ixgbe_get_supported_physical_layer_82599;
+ mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic;
+ mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic;
+ mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_82599;
+ mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82599;
+ mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82599;
+ mac->ops.start_hw = &ixgbe_start_hw_82599;
+ mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic;
+ mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic;
+ mac->ops.get_device_caps = &ixgbe_get_device_caps_generic;
+ mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic;
+ mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic;
+
+ /* RAR, Multicast, VLAN */
+ mac->ops.set_vmdq = &ixgbe_set_vmdq_generic;
+ mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic;
+ mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic;
+ mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic;
+ mac->rar_highwater = 1;
+ mac->ops.set_vfta = &ixgbe_set_vfta_generic;
+ mac->ops.set_vlvf = &ixgbe_set_vlvf_generic;
+ mac->ops.clear_vfta = &ixgbe_clear_vfta_generic;
+ mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic;
+ mac->ops.setup_sfp = &ixgbe_setup_sfp_modules_82599;
+ mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing;
+ mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing;
+
+ /* Link */
+ mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82599;
+ mac->ops.check_link = &ixgbe_check_mac_link_generic;
+ mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic;
+ ixgbe_init_mac_link_ops_82599(hw);
+
+ mac->mcft_size = 128;
+ mac->vft_size = 128;
+ mac->num_rar_entries = 128;
+ mac->rx_pb_size = 512;
+ mac->max_tx_queues = 128;
+ mac->max_rx_queues = 128;
+ mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw);
+
+ mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) &
+ IXGBE_FWSM_MODE_MASK) ? true : false;
+
+ //hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf;
+
+ /* EEPROM */
+ eeprom->ops.read = &ixgbe_read_eeprom_82599;
+ eeprom->ops.read_buffer = &ixgbe_read_eeprom_buffer_82599;
+
+ /* Manageability interface */
+ mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic;
+
+ mac->ops.get_thermal_sensor_data =
+ &ixgbe_get_thermal_sensor_data_generic;
+ mac->ops.init_thermal_sensor_thresh =
+ &ixgbe_init_thermal_sensor_thresh_generic;
+
+ return ret_val;
+}
+
+/**
+ * ixgbe_get_link_capabilities_82599 - Determines link capabilities
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @negotiation: true when autoneg or autotry is enabled
+ *
+ * Determines the link capabilities by reading the AUTOC register.
+ **/
+s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed,
+ bool *negotiation)
+{
+ s32 status = 0;
+ u32 autoc = 0;
+
+ /* Check if 1G SFP module. */
+ if (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) {
+ *speed = IXGBE_LINK_SPEED_1GB_FULL;
+ *negotiation = true;
+ goto out;
+ }
+
+ /*
+ * Determine link capabilities based on the stored value of AUTOC,
+ * which represents EEPROM defaults. If AUTOC value has not
+ * been stored, use the current register values.
+ */
+ if (hw->mac.orig_link_settings_stored)
+ autoc = hw->mac.orig_autoc;
+ else
+ autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+
+ switch (autoc & IXGBE_AUTOC_LMS_MASK) {
+ case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
+ *speed = IXGBE_LINK_SPEED_1GB_FULL;
+ *negotiation = false;
+ break;
+
+ case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
+ *speed = IXGBE_LINK_SPEED_10GB_FULL;
+ *negotiation = false;
+ break;
+
+ case IXGBE_AUTOC_LMS_1G_AN:
+ *speed = IXGBE_LINK_SPEED_1GB_FULL;
+ *negotiation = true;
+ break;
+
+ case IXGBE_AUTOC_LMS_10G_SERIAL:
+ *speed = IXGBE_LINK_SPEED_10GB_FULL;
+ *negotiation = false;
+ break;
+
+ case IXGBE_AUTOC_LMS_KX4_KX_KR:
+ case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN:
+ *speed = IXGBE_LINK_SPEED_UNKNOWN;
+ if (autoc & IXGBE_AUTOC_KR_SUPP)
+ *speed |= IXGBE_LINK_SPEED_10GB_FULL;
+ if (autoc & IXGBE_AUTOC_KX4_SUPP)
+ *speed |= IXGBE_LINK_SPEED_10GB_FULL;
+ if (autoc & IXGBE_AUTOC_KX_SUPP)
+ *speed |= IXGBE_LINK_SPEED_1GB_FULL;
+ *negotiation = true;
+ break;
+
+ case IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII:
+ *speed = IXGBE_LINK_SPEED_100_FULL;
+ if (autoc & IXGBE_AUTOC_KR_SUPP)
+ *speed |= IXGBE_LINK_SPEED_10GB_FULL;
+ if (autoc & IXGBE_AUTOC_KX4_SUPP)
+ *speed |= IXGBE_LINK_SPEED_10GB_FULL;
+ if (autoc & IXGBE_AUTOC_KX_SUPP)
+ *speed |= IXGBE_LINK_SPEED_1GB_FULL;
+ *negotiation = true;
+ break;
+
+ case IXGBE_AUTOC_LMS_SGMII_1G_100M:
+ *speed = IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_100_FULL;
+ *negotiation = false;
+ break;
+
+ default:
+ status = IXGBE_ERR_LINK_SETUP;
+ goto out;
+ break;
+ }
+
+ if (hw->phy.multispeed_fiber) {
+ *speed |= IXGBE_LINK_SPEED_10GB_FULL |
+ IXGBE_LINK_SPEED_1GB_FULL;
+ *negotiation = true;
+ }
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_get_media_type_82599 - Get media type
+ * @hw: pointer to hardware structure
+ *
+ * Returns the media type (fiber, copper, backplane)
+ **/
+enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw)
+{
+ enum ixgbe_media_type media_type;
+
+ /* Detect if there is a copper PHY attached. */
+ switch (hw->phy.type) {
+ case ixgbe_phy_cu_unknown:
+ case ixgbe_phy_tn:
+ media_type = ixgbe_media_type_copper;
+ goto out;
+ default:
+ break;
+ }
+
+ switch (hw->device_id) {
+ case IXGBE_DEV_ID_82599_KX4:
+ case IXGBE_DEV_ID_82599_KX4_MEZZ:
+ case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
+ case IXGBE_DEV_ID_82599_KR:
+ case IXGBE_DEV_ID_82599_BACKPLANE_FCOE:
+ case IXGBE_DEV_ID_82599_XAUI_LOM:
+ /* Default device ID is mezzanine card KX/KX4 */
+ media_type = ixgbe_media_type_backplane;
+ break;
+ case IXGBE_DEV_ID_82599_SFP:
+ case IXGBE_DEV_ID_82599_SFP_FCOE:
+ case IXGBE_DEV_ID_82599_SFP_EM:
+ case IXGBE_DEV_ID_82599_SFP_SF2:
+ case IXGBE_DEV_ID_82599EN_SFP:
+ media_type = ixgbe_media_type_fiber;
+ break;
+ case IXGBE_DEV_ID_82599_CX4:
+ media_type = ixgbe_media_type_cx4;
+ break;
+ case IXGBE_DEV_ID_82599_T3_LOM:
+ media_type = ixgbe_media_type_copper;
+ break;
+ case IXGBE_DEV_ID_82599_LS:
+ media_type = ixgbe_media_type_fiber_lco;
+ break;
+ case IXGBE_DEV_ID_82599_QSFP_SF_QP:
+ media_type = ixgbe_media_type_fiber_qsfp;
+ break;
+ default:
+ media_type = ixgbe_media_type_unknown;
+ break;
+ }
+out:
+ return media_type;
+}
+
+/**
+ * ixgbe_start_mac_link_82599 - Setup MAC link settings
+ * @hw: pointer to hardware structure
+ * @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ * Configures link settings based on values in the ixgbe_hw struct.
+ * Restarts the link. Performs autonegotiation if needed.
+ **/
+s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
+ bool autoneg_wait_to_complete)
+{
+ u32 autoc_reg;
+ u32 links_reg = 0;
+ u32 i;
+ s32 status = 0;
+
+ /* Restart link */
+ autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
+
+ /* Only poll for autoneg to complete if specified to do so */
+ if (autoneg_wait_to_complete) {
+ if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+ IXGBE_AUTOC_LMS_KX4_KX_KR ||
+ (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+ IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
+ (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+ IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
+ for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
+ links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+ if (links_reg & IXGBE_LINKS_KX_AN_COMP)
+ break;
+ msleep(100);
+ }
+ if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
+ status = IXGBE_ERR_AUTONEG_NOT_COMPLETE;
+ hw_dbg(hw, "Autoneg did not complete.\n");
+ }
+ }
+ }
+
+ /* Add delay to filter out noises during initial link setup */
+ msleep(50);
+
+ return status;
+}
+
+/**
+ * ixgbe_disable_tx_laser_multispeed_fiber - Disable Tx laser
+ * @hw: pointer to hardware structure
+ *
+ * The base drivers may require better control over SFP+ module
+ * PHY states. This includes selectively shutting down the Tx
+ * laser on the PHY, effectively halting physical link.
+ **/
+void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
+{
+ u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+ /* Disable tx laser; allow 100us to go dark per spec */
+ esdp_reg |= IXGBE_ESDP_SDP3;
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+ IXGBE_WRITE_FLUSH(hw);
+ udelay(100);
+}
+
+/**
+ * ixgbe_enable_tx_laser_multispeed_fiber - Enable Tx laser
+ * @hw: pointer to hardware structure
+ *
+ * The base drivers may require better control over SFP+ module
+ * PHY states. This includes selectively turning on the Tx
+ * laser on the PHY, effectively starting physical link.
+ **/
+void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
+{
+ u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+ /* Enable tx laser; allow 100ms to light up */
+ esdp_reg &= ~IXGBE_ESDP_SDP3;
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+ IXGBE_WRITE_FLUSH(hw);
+ msleep(100);
+}
+
+/**
+ * ixgbe_flap_tx_laser_multispeed_fiber - Flap Tx laser
+ * @hw: pointer to hardware structure
+ *
+ * When the driver changes the link speeds that it can support,
+ * it sets autotry_restart to true to indicate that we need to
+ * initiate a new autotry session with the link partner. To do
+ * so, we set the speed then disable and re-enable the tx laser, to
+ * alert the link partner that it also needs to restart autotry on its
+ * end. This is consistent with true clause 37 autoneg, which also
+ * involves a loss of signal.
+ **/
+void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
+{
+ if (hw->mac.autotry_restart) {
+ ixgbe_disable_tx_laser_multispeed_fiber(hw);
+ ixgbe_enable_tx_laser_multispeed_fiber(hw);
+ hw->mac.autotry_restart = false;
+ }
+}
+
+/**
+ * ixgbe_setup_mac_link_multispeed_fiber - Set MAC link speed
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg: true if autonegotiation enabled
+ * @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ * Set the link speed in the AUTOC register and restarts link.
+ **/
+s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed, bool autoneg,
+ bool autoneg_wait_to_complete)
+{
+ s32 status = 0;
+ ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
+ ixgbe_link_speed highest_link_speed = IXGBE_LINK_SPEED_UNKNOWN;
+ u32 speedcnt = 0;
+ u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
+ u32 i = 0;
+ bool link_up = false;
+ bool negotiation;
+
+ /* Mask off requested but non-supported speeds */
+ status = ixgbe_get_link_capabilities(hw, &link_speed, &negotiation);
+ if (status != 0)
+ return status;
+
+ speed &= link_speed;
+
+ /*
+ * Try each speed one by one, highest priority first. We do this in
+ * software because 10gb fiber doesn't support speed autonegotiation.
+ */
+ if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
+ speedcnt++;
+ highest_link_speed = IXGBE_LINK_SPEED_10GB_FULL;
+
+ /* If we already have link at this speed, just jump out */
+ status = ixgbe_check_link(hw, &link_speed, &link_up, false);
+ if (status != 0)
+ return status;
+
+ if ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) && link_up)
+ goto out;
+
+ /* Set the module link speed */
+ esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5);
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* Allow module to change analog characteristics (1G->10G) */
+ msleep(40);
+
+ status = ixgbe_setup_mac_link_82599(hw,
+ IXGBE_LINK_SPEED_10GB_FULL,
+ autoneg,
+ autoneg_wait_to_complete);
+ if (status != 0)
+ return status;
+
+ /* Flap the tx laser if it has not already been done */
+ ixgbe_flap_tx_laser(hw);
+
+ /*
+ * Wait for the controller to acquire link. Per IEEE 802.3ap,
+ * Section 73.10.2, we may have to wait up to 500ms if KR is
+ * attempted. 82599 uses the same timing for 10g SFI.
+ */
+ for (i = 0; i < 5; i++) {
+ /* Wait for the link partner to also set speed */
+ msleep(100);
+
+ /* If we have link, just jump out */
+ status = ixgbe_check_link(hw, &link_speed,
+ &link_up, false);
+ if (status != 0)
+ return status;
+
+ if (link_up)
+ goto out;
+ }
+ }
+
+ if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
+ speedcnt++;
+ if (highest_link_speed == IXGBE_LINK_SPEED_UNKNOWN)
+ highest_link_speed = IXGBE_LINK_SPEED_1GB_FULL;
+
+ /* If we already have link at this speed, just jump out */
+ status = ixgbe_check_link(hw, &link_speed, &link_up, false);
+ if (status != 0)
+ return status;
+
+ if ((link_speed == IXGBE_LINK_SPEED_1GB_FULL) && link_up)
+ goto out;
+
+ /* Set the module link speed */
+ esdp_reg &= ~IXGBE_ESDP_SDP5;
+ esdp_reg |= IXGBE_ESDP_SDP5_DIR;
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* Allow module to change analog characteristics (10G->1G) */
+ msleep(40);
+
+ status = ixgbe_setup_mac_link_82599(hw,
+ IXGBE_LINK_SPEED_1GB_FULL,
+ autoneg,
+ autoneg_wait_to_complete);
+ if (status != 0)
+ return status;
+
+ /* Flap the tx laser if it has not already been done */
+ ixgbe_flap_tx_laser(hw);
+
+ /* Wait for the link partner to also set speed */
+ msleep(100);
+
+ /* If we have link, just jump out */
+ status = ixgbe_check_link(hw, &link_speed, &link_up, false);
+ if (status != 0)
+ return status;
+
+ if (link_up)
+ goto out;
+ }
+
+ /*
+ * We didn't get link. Configure back to the highest speed we tried,
+ * (if there was more than one). We call ourselves back with just the
+ * single highest speed that the user requested.
+ */
+ if (speedcnt > 1)
+ status = ixgbe_setup_mac_link_multispeed_fiber(hw,
+ highest_link_speed, autoneg, autoneg_wait_to_complete);
+
+out:
+ /* Set autoneg_advertised value based on input link speed */
+ hw->phy.autoneg_advertised = 0;
+
+ if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+ hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+ if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+ hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+ return status;
+}
+
+/**
+ * ixgbe_setup_mac_link_smartspeed - Set MAC link speed using SmartSpeed
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg: true if autonegotiation enabled
+ * @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ * Implements the Intel SmartSpeed algorithm.
+ **/
+s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed, bool autoneg,
+ bool autoneg_wait_to_complete)
+{
+ s32 status = 0;
+ ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
+ s32 i, j;
+ bool link_up = false;
+ u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+
+ /* Set autoneg_advertised value based on input link speed */
+ hw->phy.autoneg_advertised = 0;
+
+ if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+ hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+ if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+ hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+ if (speed & IXGBE_LINK_SPEED_100_FULL)
+ hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL;
+
+ /*
+ * Implement Intel SmartSpeed algorithm. SmartSpeed will reduce the
+ * autoneg advertisement if link is unable to be established at the
+ * highest negotiated rate. This can sometimes happen due to integrity
+ * issues with the physical media connection.
+ */
+
+ /* First, try to get link with full advertisement */
+ hw->phy.smart_speed_active = false;
+ for (j = 0; j < IXGBE_SMARTSPEED_MAX_RETRIES; j++) {
+ status = ixgbe_setup_mac_link_82599(hw, speed, autoneg,
+ autoneg_wait_to_complete);
+ if (status != 0)
+ goto out;
+
+ /*
+ * Wait for the controller to acquire link. Per IEEE 802.3ap,
+ * Section 73.10.2, we may have to wait up to 500ms if KR is
+ * attempted, or 200ms if KX/KX4/BX/BX4 is attempted, per
+ * Table 9 in the AN MAS.
+ */
+ for (i = 0; i < 5; i++) {
+ msleep(100);
+
+ /* If we have link, just jump out */
+ status = ixgbe_check_link(hw, &link_speed, &link_up,
+ false);
+ if (status != 0)
+ goto out;
+
+ if (link_up)
+ goto out;
+ }
+ }
+
+ /*
+ * We didn't get link. If we advertised KR plus one of KX4/KX
+ * (or BX4/BX), then disable KR and try again.
+ */
+ if (((autoc_reg & IXGBE_AUTOC_KR_SUPP) == 0) ||
+ ((autoc_reg & IXGBE_AUTOC_KX4_KX_SUPP_MASK) == 0))
+ goto out;
+
+ /* Turn SmartSpeed on to disable KR support */
+ hw->phy.smart_speed_active = true;
+ status = ixgbe_setup_mac_link_82599(hw, speed, autoneg,
+ autoneg_wait_to_complete);
+ if (status != 0)
+ goto out;
+
+ /*
+ * Wait for the controller to acquire link. 600ms will allow for
+ * the AN link_fail_inhibit_timer as well for multiple cycles of
+ * parallel detect, both 10g and 1g. This allows for the maximum
+ * connect attempts as defined in the AN MAS table 73-7.
+ */
+ for (i = 0; i < 6; i++) {
+ msleep(100);
+
+ /* If we have link, just jump out */
+ status = ixgbe_check_link(hw, &link_speed, &link_up, false);
+ if (status != 0)
+ goto out;
+
+ if (link_up)
+ goto out;
+ }
+
+ /* We didn't get link. Turn SmartSpeed back off. */
+ hw->phy.smart_speed_active = false;
+ status = ixgbe_setup_mac_link_82599(hw, speed, autoneg,
+ autoneg_wait_to_complete);
+
+out:
+ if (link_up && (link_speed == IXGBE_LINK_SPEED_1GB_FULL))
+ hw_dbg(hw, "Smartspeed has downgraded the link speed "
+ "from the maximum advertised\n");
+ return status;
+}
+
+/**
+ * ixgbe_setup_mac_link_82599 - Set MAC link speed
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg: true if autonegotiation enabled
+ * @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ * Set the link speed in the AUTOC register and restarts link.
+ **/
+s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed, bool autoneg,
+ bool autoneg_wait_to_complete)
+{
+ s32 status = 0;
+ u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+ u32 start_autoc = autoc;
+ u32 orig_autoc = 0;
+ u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK;
+ u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK;
+ u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK;
+ u32 links_reg = 0;
+ u32 i;
+ ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN;
+
+ /* Check to see if speed passed in is supported. */
+ status = ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg);
+ if (status != 0)
+ goto out;
+
+ speed &= link_capabilities;
+
+ if (speed == IXGBE_LINK_SPEED_UNKNOWN) {
+ status = IXGBE_ERR_LINK_SETUP;
+ goto out;
+ }
+
+ /* Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support*/
+ if (hw->mac.orig_link_settings_stored)
+ orig_autoc = hw->mac.orig_autoc;
+ else
+ orig_autoc = autoc;
+
+ if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR ||
+ link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
+ link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
+ /* Set KX4/KX/KR support according to speed requested */
+ autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP);
+ if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+ if (orig_autoc & IXGBE_AUTOC_KX4_SUPP)
+ autoc |= IXGBE_AUTOC_KX4_SUPP;
+ if ((orig_autoc & IXGBE_AUTOC_KR_SUPP) &&
+ (hw->phy.smart_speed_active == false))
+ autoc |= IXGBE_AUTOC_KR_SUPP;
+ if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+ autoc |= IXGBE_AUTOC_KX_SUPP;
+ } else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) &&
+ (link_mode == IXGBE_AUTOC_LMS_1G_LINK_NO_AN ||
+ link_mode == IXGBE_AUTOC_LMS_1G_AN)) {
+ /* Switch from 1G SFI to 10G SFI if requested */
+ if ((speed == IXGBE_LINK_SPEED_10GB_FULL) &&
+ (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI)) {
+ autoc &= ~IXGBE_AUTOC_LMS_MASK;
+ autoc |= IXGBE_AUTOC_LMS_10G_SERIAL;
+ }
+ } else if ((pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI) &&
+ (link_mode == IXGBE_AUTOC_LMS_10G_SERIAL)) {
+ /* Switch from 10G SFI to 1G SFI if requested */
+ if ((speed == IXGBE_LINK_SPEED_1GB_FULL) &&
+ (pma_pmd_1g == IXGBE_AUTOC_1G_SFI)) {
+ autoc &= ~IXGBE_AUTOC_LMS_MASK;
+ if (autoneg)
+ autoc |= IXGBE_AUTOC_LMS_1G_AN;
+ else
+ autoc |= IXGBE_AUTOC_LMS_1G_LINK_NO_AN;
+ }
+ }
+
+ if (autoc != start_autoc) {
+ /* Restart link */
+ autoc |= IXGBE_AUTOC_AN_RESTART;
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc);
+
+ /* Only poll for autoneg to complete if specified to do so */
+ if (autoneg_wait_to_complete) {
+ if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR ||
+ link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
+ link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
+ for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
+ links_reg =
+ IXGBE_READ_REG(hw, IXGBE_LINKS);
+ if (links_reg & IXGBE_LINKS_KX_AN_COMP)
+ break;
+ msleep(100);
+ }
+ if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
+ status =
+ IXGBE_ERR_AUTONEG_NOT_COMPLETE;
+ hw_dbg(hw, "Autoneg did not complete.\n");
+ }
+ }
+ }
+
+ /* Add delay to filter out noises during initial link setup */
+ msleep(50);
+ }
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_setup_copper_link_82599 - Set the PHY autoneg advertised field
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg: true if autonegotiation enabled
+ * @autoneg_wait_to_complete: true if waiting is needed to complete
+ *
+ * Restarts link on PHY and MAC based on settings passed in.
+ **/
+static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg,
+ bool autoneg_wait_to_complete)
+{
+ s32 status;
+
+ /* Setup the PHY according to input speed */
+ status = hw->phy.ops.setup_link_speed(hw, speed, autoneg,
+ autoneg_wait_to_complete);
+ /* Set up MAC */
+ ixgbe_start_mac_link_82599(hw, autoneg_wait_to_complete);
+
+ return status;
+}
+
+/**
+ * ixgbe_reset_hw_82599 - Perform hardware reset
+ * @hw: pointer to hardware structure
+ *
+ * Resets the hardware by resetting the transmit and receive units, masks
+ * and clears all interrupts, perform a PHY reset, and perform a link (MAC)
+ * reset.
+ **/
+s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
+{
+// ixgbe_link_speed link_speed;
+ s32 status = 0;
+// u32 ctrl, i, autoc, autoc2;
+// bool link_up = false;
+
+#if 0
+ /* Call adapter stop to disable tx/rx and clear interrupts */
+ status = hw->mac.ops.stop_adapter(hw);
+ if (status != 0)
+ goto reset_hw_out;
+
+ /* flush pending Tx transactions */
+ ixgbe_clear_tx_pending(hw);
+
+ /* PHY ops must be identified and initialized prior to reset */
+
+ /* Identify PHY and related function pointers */
+ status = hw->phy.ops.init(hw);
+
+ if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+ goto reset_hw_out;
+
+ /* Setup SFP module if there is one present. */
+ if (hw->phy.sfp_setup_needed) {
+ status = hw->mac.ops.setup_sfp(hw);
+ hw->phy.sfp_setup_needed = false;
+ }
+
+ if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+ goto reset_hw_out;
+
+ /* Reset PHY */
+ if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL)
+ hw->phy.ops.reset(hw);
+
+mac_reset_top:
+ /*
+ * Issue global reset to the MAC. Needs to be SW reset if link is up.
+ * If link reset is used when link is up, it might reset the PHY when
+ * mng is using it. If link is down or the flag to force full link
+ * reset is set, then perform link reset.
+ */
+ ctrl = IXGBE_CTRL_LNK_RST;
+ if (!hw->force_full_reset) {
+ hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+ if (link_up)
+ ctrl = IXGBE_CTRL_RST;
+ }
+
+ ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
+ IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* Poll for reset bit to self-clear indicating reset is complete */
+ for (i = 0; i < 10; i++) {
+ udelay(1);
+ ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+ if (!(ctrl & IXGBE_CTRL_RST_MASK))
+ break;
+ }
+
+ if (ctrl & IXGBE_CTRL_RST_MASK) {
+ status = IXGBE_ERR_RESET_FAILED;
+ hw_dbg(hw, "Reset polling failed to complete.\n");
+ }
+
+ msleep(50);
+
+ /*
+ * Double resets are required for recovery from certain error
+ * conditions. Between resets, it is necessary to stall to allow time
+ * for any pending HW events to complete.
+ */
+ if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
+ hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+ goto mac_reset_top;
+ }
+
+ /*
+ * Store the original AUTOC/AUTOC2 values if they have not been
+ * stored off yet. Otherwise restore the stored original
+ * values since the reset operation sets back to defaults.
+ */
+ autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+ if (hw->mac.orig_link_settings_stored == false) {
+ hw->mac.orig_autoc = autoc;
+ hw->mac.orig_autoc2 = autoc2;
+ hw->mac.orig_link_settings_stored = true;
+ } else {
+ if (autoc != hw->mac.orig_autoc)
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc |
+ IXGBE_AUTOC_AN_RESTART));
+
+ if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) !=
+ (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) {
+ autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK;
+ autoc2 |= (hw->mac.orig_autoc2 &
+ IXGBE_AUTOC2_UPPER_MASK);
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2);
+ }
+ }
+#endif
+
+ /* Store the permanent mac address */
+ hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
+ /*
+ * Store MAC address from RAR0, clear receive address registers, and
+ * clear the multicast table. Also reset num_rar_entries to 128,
+ * since we modify this value when programming the SAN MAC address.
+ */
+ hw->mac.num_rar_entries = 128;
+ hw->mac.ops.init_rx_addrs(hw);
+
+ /* Store the permanent SAN mac address */
+ hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
+
+ /* Add the SAN MAC address to the RAR only if it's a valid address */
+ if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
+ hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
+ hw->mac.san_addr, 0, IXGBE_RAH_AV);
+
+ /* Save the SAN MAC RAR index */
+ hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
+
+ /* Reserve the last RAR for the SAN MAC address */
+ hw->mac.num_rar_entries--;
+ }
+
+ /* Store the alternative WWNN/WWPN prefix */
+ hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix,
+ &hw->mac.wwpn_prefix);
+
+//reset_hw_out:
+ return status;
+}
+
+/**
+ * ixgbe_reinit_fdir_tables_82599 - Reinitialize Flow Director tables.
+ * @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw)
+{
+ int i;
+ u32 fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL);
+ fdirctrl &= ~IXGBE_FDIRCTRL_INIT_DONE;
+
+ /*
+ * Before starting reinitialization process,
+ * FDIRCMD.CMD must be zero.
+ */
+ for (i = 0; i < IXGBE_FDIRCMD_CMD_POLL; i++) {
+ if (!(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) &
+ IXGBE_FDIRCMD_CMD_MASK))
+ break;
+ udelay(10);
+ }
+ if (i >= IXGBE_FDIRCMD_CMD_POLL) {
+ hw_dbg(hw, "Flow Director previous command isn't complete, "
+ "aborting table re-initialization.\n");
+ return IXGBE_ERR_FDIR_REINIT_FAILED;
+ }
+
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRFREE, 0);
+ IXGBE_WRITE_FLUSH(hw);
+ /*
+ * 82599 adapters flow director init flow cannot be restarted,
+ * Workaround 82599 silicon errata by performing the following steps
+ * before re-writing the FDIRCTRL control register with the same value.
+ * - write 1 to bit 8 of FDIRCMD register &
+ * - write 0 to bit 8 of FDIRCMD register
+ */
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+ (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) |
+ IXGBE_FDIRCMD_CLEARHT));
+ IXGBE_WRITE_FLUSH(hw);
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+ (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) &
+ ~IXGBE_FDIRCMD_CLEARHT));
+ IXGBE_WRITE_FLUSH(hw);
+ /*
+ * Clear FDIR Hash register to clear any leftover hashes
+ * waiting to be programmed.
+ */
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, 0x00);
+ IXGBE_WRITE_FLUSH(hw);
+
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* Poll init-done after we write FDIRCTRL register */
+ for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
+ if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
+ IXGBE_FDIRCTRL_INIT_DONE)
+ break;
+ udelay(10);
+ }
+ if (i >= IXGBE_FDIR_INIT_DONE_POLL) {
+ hw_dbg(hw, "Flow Director Signature poll time exceeded!\n");
+ return IXGBE_ERR_FDIR_REINIT_FAILED;
+ }
+
+ /* Clear FDIR statistics registers (read to clear) */
+ IXGBE_READ_REG(hw, IXGBE_FDIRUSTAT);
+ IXGBE_READ_REG(hw, IXGBE_FDIRFSTAT);
+ IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
+ IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
+ IXGBE_READ_REG(hw, IXGBE_FDIRLEN);
+
+ return 0;
+}
+
+/**
+ * ixgbe_fdir_enable_82599 - Initialize Flow Director control registers
+ * @hw: pointer to hardware structure
+ * @fdirctrl: value to write to flow director control register
+ **/
+static void ixgbe_fdir_enable_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+{
+ int i;
+
+ /* Prime the keys for hashing */
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
+
+ /*
+ * Poll init-done after we write the register. Estimated times:
+ * 10G: PBALLOC = 11b, timing is 60us
+ * 1G: PBALLOC = 11b, timing is 600us
+ * 100M: PBALLOC = 11b, timing is 6ms
+ *
+ * Multiple these timings by 4 if under full Rx load
+ *
+ * So we'll poll for IXGBE_FDIR_INIT_DONE_POLL times, sleeping for
+ * 1 msec per poll time. If we're at line rate and drop to 100M, then
+ * this might not finish in our poll time, but we can live with that
+ * for now.
+ */
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
+ IXGBE_WRITE_FLUSH(hw);
+ for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
+ if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
+ IXGBE_FDIRCTRL_INIT_DONE)
+ break;
+ msleep(1);
+ }
+
+ if (i >= IXGBE_FDIR_INIT_DONE_POLL)
+ hw_dbg(hw, "Flow Director poll time exceeded!\n");
+}
+
+/**
+ * ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters
+ * @hw: pointer to hardware structure
+ * @fdirctrl: value to write to flow director control register, initially
+ * contains just the value of the Rx packet buffer allocation
+ **/
+s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+{
+ /*
+ * Continue setup of fdirctrl register bits:
+ * Move the flexible bytes to use the ethertype - shift 6 words
+ * Set the maximum length per hash bucket to 0xA filters
+ * Send interrupt when 64 filters are left
+ */
+ fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
+ (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
+ (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
+
+ /* write hashes and fdirctrl register, poll for completion */
+ ixgbe_fdir_enable_82599(hw, fdirctrl);
+
+ return 0;
+}
+
+/**
+ * ixgbe_init_fdir_perfect_82599 - Initialize Flow Director perfect filters
+ * @hw: pointer to hardware structure
+ * @fdirctrl: value to write to flow director control register, initially
+ * contains just the value of the Rx packet buffer allocation
+ **/
+s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+{
+ /*
+ * Continue setup of fdirctrl register bits:
+ * Turn perfect match filtering on
+ * Report hash in RSS field of Rx wb descriptor
+ * Initialize the drop queue
+ * Move the flexible bytes to use the ethertype - shift 6 words
+ * Set the maximum length per hash bucket to 0xA filters
+ * Send interrupt when 64 (0x4 * 16) filters are left
+ */
+ fdirctrl |= IXGBE_FDIRCTRL_PERFECT_MATCH |
+ IXGBE_FDIRCTRL_REPORT_STATUS |
+ (IXGBE_FDIR_DROP_QUEUE << IXGBE_FDIRCTRL_DROP_Q_SHIFT) |
+ (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
+ (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
+ (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
+
+ /* write hashes and fdirctrl register, poll for completion */
+ ixgbe_fdir_enable_82599(hw, fdirctrl);
+
+ return 0;
+}
+
+/*
+ * These defines allow us to quickly generate all of the necessary instructions
+ * in the function below by simply calling out IXGBE_COMPUTE_SIG_HASH_ITERATION
+ * for values 0 through 15
+ */
+#define IXGBE_ATR_COMMON_HASH_KEY \
+ (IXGBE_ATR_BUCKET_HASH_KEY & IXGBE_ATR_SIGNATURE_HASH_KEY)
+#define IXGBE_COMPUTE_SIG_HASH_ITERATION(_n) \
+do { \
+ u32 n = (_n); \
+ if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << n)) \
+ common_hash ^= lo_hash_dword >> n; \
+ else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
+ bucket_hash ^= lo_hash_dword >> n; \
+ else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << n)) \
+ sig_hash ^= lo_hash_dword << (16 - n); \
+ if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << (n + 16))) \
+ common_hash ^= hi_hash_dword >> n; \
+ else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
+ bucket_hash ^= hi_hash_dword >> n; \
+ else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << (n + 16))) \
+ sig_hash ^= hi_hash_dword << (16 - n); \
+} while (0);
+
+/**
+ * ixgbe_atr_compute_sig_hash_82599 - Compute the signature hash
+ * @stream: input bitstream to compute the hash on
+ *
+ * This function is almost identical to the function above but contains
+ * several optomizations such as unwinding all of the loops, letting the
+ * compiler work out all of the conditional ifs since the keys are static
+ * defines, and computing two keys at once since the hashed dword stream
+ * will be the same for both keys.
+ **/
+u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
+ union ixgbe_atr_hash_dword common)
+{
+ u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
+ u32 sig_hash = 0, bucket_hash = 0, common_hash = 0;
+
+ /* record the flow_vm_vlan bits as they are a key part to the hash */
+ flow_vm_vlan = IXGBE_NTOHL(input.dword);
+
+ /* generate common hash dword */
+ hi_hash_dword = IXGBE_NTOHL(common.dword);
+
+ /* low dword is word swapped version of common */
+ lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
+
+ /* apply flow ID/VM pool/VLAN ID bits to hash words */
+ hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
+
+ /* Process bits 0 and 16 */
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(0);
+
+ /*
+ * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
+ * delay this because bit 0 of the stream should not be processed
+ * so we do not add the vlan until after bit 0 was processed
+ */
+ lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
+
+ /* Process remaining 30 bit of the key */
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(1);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(2);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(3);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(4);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(5);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(6);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(7);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(8);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(9);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(10);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(11);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(12);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(13);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(14);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(15);
+
+ /* combine common_hash result with signature and bucket hashes */
+ bucket_hash ^= common_hash;
+ bucket_hash &= IXGBE_ATR_HASH_MASK;
+
+ sig_hash ^= common_hash << 16;
+ sig_hash &= IXGBE_ATR_HASH_MASK << 16;
+
+ /* return completed signature hash */
+ return sig_hash ^ bucket_hash;
+}
+
+/**
+ * ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter
+ * @hw: pointer to hardware structure
+ * @input: unique input dword
+ * @common: compressed common input dword
+ * @queue: queue index to direct traffic to
+ **/
+s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
+ union ixgbe_atr_hash_dword input,
+ union ixgbe_atr_hash_dword common,
+ u8 queue)
+{
+ u64 fdirhashcmd;
+ u32 fdircmd;
+
+ /*
+ * Get the flow_type in order to program FDIRCMD properly
+ * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6
+ */
+ switch (input.formatted.flow_type) {
+ case IXGBE_ATR_FLOW_TYPE_TCPV4:
+ case IXGBE_ATR_FLOW_TYPE_UDPV4:
+ case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+ case IXGBE_ATR_FLOW_TYPE_TCPV6:
+ case IXGBE_ATR_FLOW_TYPE_UDPV6:
+ case IXGBE_ATR_FLOW_TYPE_SCTPV6:
+ break;
+ default:
+ hw_dbg(hw, " Error on flow type input\n");
+ return IXGBE_ERR_CONFIG;
+ }
+
+ /* configure FDIRCMD register */
+ fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
+ IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
+ fdircmd |= input.formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
+ fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
+
+ /*
+ * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits
+ * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH.
+ */
+ fdirhashcmd = (u64)fdircmd << 32;
+ fdirhashcmd |= ixgbe_atr_compute_sig_hash_82599(input, common);
+ IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd);
+
+ hw_dbg(hw, "Tx Queue=%x hash=%x\n", queue, (u32)fdirhashcmd);
+
+ return 0;
+}
+
+#define IXGBE_COMPUTE_BKT_HASH_ITERATION(_n) \
+do { \
+ u32 n = (_n); \
+ if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
+ bucket_hash ^= lo_hash_dword >> n; \
+ if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
+ bucket_hash ^= hi_hash_dword >> n; \
+} while (0);
+
+/**
+ * ixgbe_atr_compute_perfect_hash_82599 - Compute the perfect filter hash
+ * @atr_input: input bitstream to compute the hash on
+ * @input_mask: mask for the input bitstream
+ *
+ * This function serves two main purposes. First it applys the input_mask
+ * to the atr_input resulting in a cleaned up atr_input data stream.
+ * Secondly it computes the hash and stores it in the bkt_hash field at
+ * the end of the input byte stream. This way it will be available for
+ * future use without needing to recompute the hash.
+ **/
+void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
+ union ixgbe_atr_input *input_mask)
+{
+
+ u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
+ u32 bucket_hash = 0;
+
+ /* Apply masks to input data */
+ input->dword_stream[0] &= input_mask->dword_stream[0];
+ input->dword_stream[1] &= input_mask->dword_stream[1];
+ input->dword_stream[2] &= input_mask->dword_stream[2];
+ input->dword_stream[3] &= input_mask->dword_stream[3];
+ input->dword_stream[4] &= input_mask->dword_stream[4];
+ input->dword_stream[5] &= input_mask->dword_stream[5];
+ input->dword_stream[6] &= input_mask->dword_stream[6];
+ input->dword_stream[7] &= input_mask->dword_stream[7];
+ input->dword_stream[8] &= input_mask->dword_stream[8];
+ input->dword_stream[9] &= input_mask->dword_stream[9];
+ input->dword_stream[10] &= input_mask->dword_stream[10];
+
+ /* record the flow_vm_vlan bits as they are a key part to the hash */
+ flow_vm_vlan = IXGBE_NTOHL(input->dword_stream[0]);
+
+ /* generate common hash dword */
+ hi_hash_dword = IXGBE_NTOHL(input->dword_stream[1] ^
+ input->dword_stream[2] ^
+ input->dword_stream[3] ^
+ input->dword_stream[4] ^
+ input->dword_stream[5] ^
+ input->dword_stream[6] ^
+ input->dword_stream[7] ^
+ input->dword_stream[8] ^
+ input->dword_stream[9] ^
+ input->dword_stream[10]);
+
+ /* low dword is word swapped version of common */
+ lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
+
+ /* apply flow ID/VM pool/VLAN ID bits to hash words */
+ hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
+
+ /* Process bits 0 and 16 */
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(0);
+
+ /*
+ * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
+ * delay this because bit 0 of the stream should not be processed
+ * so we do not add the vlan until after bit 0 was processed
+ */
+ lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
+
+ /* Process remaining 30 bit of the key */
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(1);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(2);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(3);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(4);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(5);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(6);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(7);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(8);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(9);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(10);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(11);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(12);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(13);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(14);
+ IXGBE_COMPUTE_BKT_HASH_ITERATION(15);
+
+ /*
+ * Limit hash to 13 bits since max bucket count is 8K.
+ * Store result at the end of the input stream.
+ */
+ input->formatted.bkt_hash = bucket_hash & 0x1FFF;
+}
+
+/**
+ * ixgbe_get_fdirtcpm_82599 - generate a tcp port from atr_input_masks
+ * @input_mask: mask to be bit swapped
+ *
+ * The source and destination port masks for flow director are bit swapped
+ * in that bit 15 effects bit 0, 14 effects 1, 13, 2 etc. In order to
+ * generate a correctly swapped value we need to bit swap the mask and that
+ * is what is accomplished by this function.
+ **/
+static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask)
+{
+ u32 mask = IXGBE_NTOHS(input_mask->formatted.dst_port);
+ mask <<= IXGBE_FDIRTCPM_DPORTM_SHIFT;
+ mask |= IXGBE_NTOHS(input_mask->formatted.src_port);
+ mask = ((mask & 0x55555555) << 1) | ((mask & 0xAAAAAAAA) >> 1);
+ mask = ((mask & 0x33333333) << 2) | ((mask & 0xCCCCCCCC) >> 2);
+ mask = ((mask & 0x0F0F0F0F) << 4) | ((mask & 0xF0F0F0F0) >> 4);
+ return ((mask & 0x00FF00FF) << 8) | ((mask & 0xFF00FF00) >> 8);
+}
+
+/*
+ * These two macros are meant to address the fact that we have registers
+ * that are either all or in part big-endian. As a result on big-endian
+ * systems we will end up byte swapping the value to little-endian before
+ * it is byte swapped again and written to the hardware in the original
+ * big-endian format.
+ */
+#define IXGBE_STORE_AS_BE32(_value) \
+ (((u32)(_value) >> 24) | (((u32)(_value) & 0x00FF0000) >> 8) | \
+ (((u32)(_value) & 0x0000FF00) << 8) | ((u32)(_value) << 24))
+
+#define IXGBE_WRITE_REG_BE32(a, reg, value) \
+ IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(IXGBE_NTOHL(value)))
+
+#define IXGBE_STORE_AS_BE16(_value) \
+ IXGBE_NTOHS(((u16)(_value) >> 8) | ((u16)(_value) << 8))
+
+s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+ union ixgbe_atr_input *input_mask)
+{
+ /* mask IPv6 since it is currently not supported */
+ u32 fdirm = IXGBE_FDIRM_DIPv6;
+ u32 fdirtcpm;
+
+ /*
+ * Program the relevant mask registers. If src/dst_port or src/dst_addr
+ * are zero, then assume a full mask for that field. Also assume that
+ * a VLAN of 0 is unspecified, so mask that out as well. L4type
+ * cannot be masked out in this implementation.
+ *
+ * This also assumes IPv4 only. IPv6 masking isn't supported at this
+ * point in time.
+ */
+
+ /* verify bucket hash is cleared on hash generation */
+ if (input_mask->formatted.bkt_hash)
+ hw_dbg(hw, " bucket hash should always be 0 in mask\n");
+
+ /* Program FDIRM and verify partial masks */
+ switch (input_mask->formatted.vm_pool & 0x7F) {
+ case 0x0:
+ fdirm |= IXGBE_FDIRM_POOL;
+ case 0x7F:
+ break;
+ default:
+ hw_dbg(hw, " Error on vm pool mask\n");
+ return IXGBE_ERR_CONFIG;
+ }
+
+ switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) {
+ case 0x0:
+ fdirm |= IXGBE_FDIRM_L4P;
+ if (input_mask->formatted.dst_port ||
+ input_mask->formatted.src_port) {
+ hw_dbg(hw, " Error on src/dst port mask\n");
+ return IXGBE_ERR_CONFIG;
+ }
+ case IXGBE_ATR_L4TYPE_MASK:
+ break;
+ default:
+ hw_dbg(hw, " Error on flow type mask\n");
+ return IXGBE_ERR_CONFIG;
+ }
+
+ switch (IXGBE_NTOHS(input_mask->formatted.vlan_id) & 0xEFFF) {
+ case 0x0000:
+ /* mask VLAN ID, fall through to mask VLAN priority */
+ fdirm |= IXGBE_FDIRM_VLANID;
+ case 0x0FFF:
+ /* mask VLAN priority */
+ fdirm |= IXGBE_FDIRM_VLANP;
+ break;
+ case 0xE000:
+ /* mask VLAN ID only, fall through */
+ fdirm |= IXGBE_FDIRM_VLANID;
+ case 0xEFFF:
+ /* no VLAN fields masked */
+ break;
+ default:
+ hw_dbg(hw, " Error on VLAN mask\n");
+ return IXGBE_ERR_CONFIG;
+ }
+
+ switch (input_mask->formatted.flex_bytes & 0xFFFF) {
+ case 0x0000:
+ /* Mask Flex Bytes, fall through */
+ fdirm |= IXGBE_FDIRM_FLEX;
+ case 0xFFFF:
+ break;
+ default:
+ hw_dbg(hw, " Error on flexible byte mask\n");
+ return IXGBE_ERR_CONFIG;
+ }
+
+ /* Now mask VM pool and destination IPv6 - bits 5 and 2 */
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm);
+
+ /* store the TCP/UDP port masks, bit reversed from port layout */
+ fdirtcpm = ixgbe_get_fdirtcpm_82599(input_mask);
+
+ /* write both the same so that UDP and TCP use the same mask */
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm);
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, ~fdirtcpm);
+
+ /* store source and destination IP masks (big-enian) */
+ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M,
+ ~input_mask->formatted.src_ip[0]);
+ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M,
+ ~input_mask->formatted.dst_ip[0]);
+
+ return 0;
+}
+
+s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+ union ixgbe_atr_input *input,
+ u16 soft_id, u8 queue)
+{
+ u32 fdirport, fdirvlan, fdirhash, fdircmd;
+
+ /* currently IPv6 is not supported, must be programmed with 0 */
+ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(0),
+ input->formatted.src_ip[0]);
+ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(1),
+ input->formatted.src_ip[1]);
+ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(2),
+ input->formatted.src_ip[2]);
+
+ /* record the source address (big-endian) */
+ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]);
+
+ /* record the first 32 bits of the destination address (big-endian) */
+ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]);
+
+ /* record source and destination port (little-endian)*/
+ fdirport = IXGBE_NTOHS(input->formatted.dst_port);
+ fdirport <<= IXGBE_FDIRPORT_DESTINATION_SHIFT;
+ fdirport |= IXGBE_NTOHS(input->formatted.src_port);
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport);
+
+ /* record vlan (little-endian) and flex_bytes(big-endian) */
+ fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes);
+ fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT;
+ fdirvlan |= IXGBE_NTOHS(input->formatted.vlan_id);
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan);
+
+ /* configure FDIRHASH register */
+ fdirhash = input->formatted.bkt_hash;
+ fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+
+ /*
+ * flush all previous writes to make certain registers are
+ * programmed prior to issuing the command
+ */
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* configure FDIRCMD register */
+ fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
+ IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
+ if (queue == IXGBE_FDIR_DROP_QUEUE)
+ fdircmd |= IXGBE_FDIRCMD_DROP;
+ fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
+ fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
+ fdircmd |= (u32)input->formatted.vm_pool << IXGBE_FDIRCMD_VT_POOL_SHIFT;
+
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, fdircmd);
+
+ return 0;
+}
+
+s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+ union ixgbe_atr_input *input,
+ u16 soft_id)
+{
+ u32 fdirhash;
+ u32 fdircmd = 0;
+ u32 retry_count;
+ s32 err = 0;
+
+ /* configure FDIRHASH register */
+ fdirhash = input->formatted.bkt_hash;
+ fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+
+ /* flush hash to HW */
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* Query if filter is present */
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, IXGBE_FDIRCMD_CMD_QUERY_REM_FILT);
+
+ for (retry_count = 10; retry_count; retry_count--) {
+ /* allow 10us for query to process */
+ udelay(10);
+ /* verify query completed successfully */
+ fdircmd = IXGBE_READ_REG(hw, IXGBE_FDIRCMD);
+ if (!(fdircmd & IXGBE_FDIRCMD_CMD_MASK))
+ break;
+ }
+
+ if (!retry_count)
+ err = IXGBE_ERR_FDIR_REINIT_FAILED;
+
+ /* if filter exists in hardware then remove it */
+ if (fdircmd & IXGBE_FDIRCMD_FILTER_VALID) {
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+ IXGBE_WRITE_FLUSH(hw);
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+ IXGBE_FDIRCMD_CMD_REMOVE_FLOW);
+ }
+
+ return err;
+}
+
+/**
+ * ixgbe_fdir_add_perfect_filter_82599 - Adds a perfect filter
+ * @hw: pointer to hardware structure
+ * @input: input bitstream
+ * @input_mask: mask for the input bitstream
+ * @soft_id: software index for the filters
+ * @queue: queue index to direct traffic to
+ *
+ * Note that the caller to this function must lock before calling, since the
+ * hardware writes must be protected from one another.
+ **/
+s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
+ union ixgbe_atr_input *input,
+ union ixgbe_atr_input *input_mask,
+ u16 soft_id, u8 queue)
+{
+ s32 err = IXGBE_ERR_CONFIG;
+
+ /*
+ * Check flow_type formatting, and bail out before we touch the hardware
+ * if there's a configuration issue
+ */
+ switch (input->formatted.flow_type) {
+ case IXGBE_ATR_FLOW_TYPE_IPV4:
+ input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK;
+ if (input->formatted.dst_port || input->formatted.src_port) {
+ hw_dbg(hw, " Error on src/dst port\n");
+ return IXGBE_ERR_CONFIG;
+ }
+ break;
+ case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+ if (input->formatted.dst_port || input->formatted.src_port) {
+ hw_dbg(hw, " Error on src/dst port\n");
+ return IXGBE_ERR_CONFIG;
+ }
+ case IXGBE_ATR_FLOW_TYPE_TCPV4:
+ case IXGBE_ATR_FLOW_TYPE_UDPV4:
+ input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
+ IXGBE_ATR_L4TYPE_MASK;
+ break;
+ default:
+ hw_dbg(hw, " Error on flow type input\n");
+ return err;
+ }
+
+ /* program input mask into the HW */
+ err = ixgbe_fdir_set_input_mask_82599(hw, input_mask);
+ if (err)
+ return err;
+
+ /* apply mask and compute/store hash */
+ ixgbe_atr_compute_perfect_hash_82599(input, input_mask);
+
+ /* program filters to filter memory */
+ return ixgbe_fdir_write_perfect_filter_82599(hw, input,
+ soft_id, queue);
+}
+
+/**
+ * ixgbe_read_analog_reg8_82599 - Reads 8 bit Omer analog register
+ * @hw: pointer to hardware structure
+ * @reg: analog register to read
+ * @val: read value
+ *
+ * Performs read operation to Omer analog register specified.
+ **/
+s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val)
+{
+ u32 core_ctl;
+
+ IXGBE_WRITE_REG(hw, IXGBE_CORECTL, IXGBE_CORECTL_WRITE_CMD |
+ (reg << 8));
+ IXGBE_WRITE_FLUSH(hw);
+ udelay(10);
+ core_ctl = IXGBE_READ_REG(hw, IXGBE_CORECTL);
+ *val = (u8)core_ctl;
+
+ return 0;
+}
+
+/**
+ * ixgbe_write_analog_reg8_82599 - Writes 8 bit Omer analog register
+ * @hw: pointer to hardware structure
+ * @reg: atlas register to write
+ * @val: value to write
+ *
+ * Performs write operation to Omer analog register specified.
+ **/
+s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val)
+{
+ u32 core_ctl;
+
+ core_ctl = (reg << 8) | val;
+ IXGBE_WRITE_REG(hw, IXGBE_CORECTL, core_ctl);
+ IXGBE_WRITE_FLUSH(hw);
+ udelay(10);
+
+ return 0;
+}
+
+/**
+ * ixgbe_start_hw_82599 - Prepare hardware for Tx/Rx
+ * @hw: pointer to hardware structure
+ *
+ * Starts the hardware using the generic start_hw function
+ * and the generation start_hw function.
+ * Then performs revision-specific operations, if any.
+ **/
+s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw)
+{
+ s32 ret_val = 0;
+
+ ret_val = ixgbe_start_hw_generic(hw);
+ if (ret_val != 0)
+ goto out;
+
+ ret_val = ixgbe_start_hw_gen2(hw);
+ if (ret_val != 0)
+ goto out;
+
+ /* We need to run link autotry after the driver loads */
+ hw->mac.autotry_restart = true;
+
+ if (ret_val == 0)
+ ret_val = ixgbe_verify_fw_version_82599(hw);
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_identify_phy_82599 - Get physical layer module
+ * @hw: pointer to hardware structure
+ *
+ * Determines the physical layer module found on the current adapter.
+ * If PHY already detected, maintains current PHY type in hw struct,
+ * otherwise executes the PHY detection routine.
+ **/
+s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw)
+{
+ s32 status = IXGBE_ERR_PHY_ADDR_INVALID;
+
+ /* Detect PHY if not unknown - returns success if already detected. */
+ status = ixgbe_identify_phy_generic(hw);
+ if (status != 0) {
+ /* 82599 10GBASE-T requires an external PHY */
+ if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper)
+ goto out;
+ else
+ status = ixgbe_identify_module_generic(hw);
+ }
+
+ /* Set PHY type none if no PHY detected */
+ if (hw->phy.type == ixgbe_phy_unknown) {
+ hw->phy.type = ixgbe_phy_none;
+ status = 0;
+ }
+
+ /* Return error if SFP module has been detected but is not supported */
+ if (hw->phy.type == ixgbe_phy_sfp_unsupported)
+ status = IXGBE_ERR_SFP_NOT_SUPPORTED;
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_get_supported_physical_layer_82599 - Returns physical layer type
+ * @hw: pointer to hardware structure
+ *
+ * Determines physical layer capabilities of the current configuration.
+ **/
+u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw)
+{
+ u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
+ u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+ u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK;
+ u32 pma_pmd_10g_parallel = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK;
+ u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK;
+ u16 ext_ability = 0;
+ u8 comp_codes_10g = 0;
+ u8 comp_codes_1g = 0;
+
+ hw->phy.ops.identify(hw);
+
+ switch (hw->phy.type) {
+ case ixgbe_phy_tn:
+ case ixgbe_phy_cu_unknown:
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability);
+ if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T;
+ if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
+ if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX;
+ goto out;
+ default:
+ break;
+ }
+
+ switch (autoc & IXGBE_AUTOC_LMS_MASK) {
+ case IXGBE_AUTOC_LMS_1G_AN:
+ case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
+ if (pma_pmd_1g == IXGBE_AUTOC_1G_KX_BX) {
+ physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX |
+ IXGBE_PHYSICAL_LAYER_1000BASE_BX;
+ goto out;
+ } else
+ /* SFI mode so read SFP module */
+ goto sfp_check;
+ break;
+ case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
+ if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_CX4)
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4;
+ else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_KX4)
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
+ else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_XAUI)
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_XAUI;
+ goto out;
+ break;
+ case IXGBE_AUTOC_LMS_10G_SERIAL:
+ if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_KR) {
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KR;
+ goto out;
+ } else if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI)
+ goto sfp_check;
+ break;
+ case IXGBE_AUTOC_LMS_KX4_KX_KR:
+ case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN:
+ if (autoc & IXGBE_AUTOC_KX_SUPP)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX;
+ if (autoc & IXGBE_AUTOC_KX4_SUPP)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
+ if (autoc & IXGBE_AUTOC_KR_SUPP)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KR;
+ goto out;
+ break;
+ default:
+ goto out;
+ break;
+ }
+
+sfp_check:
+ /* SFP check must be done last since DA modules are sometimes used to
+ * test KR mode - we need to id KR mode correctly before SFP module.
+ * Call identify_sfp because the pluggable module may have changed */
+ hw->phy.ops.identify_sfp(hw);
+ if (hw->phy.sfp_type == ixgbe_sfp_type_not_present)
+ goto out;
+
+ switch (hw->phy.type) {
+ case ixgbe_phy_sfp_passive_tyco:
+ case ixgbe_phy_sfp_passive_unknown:
+ physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU;
+ break;
+ case ixgbe_phy_sfp_ftl_active:
+ case ixgbe_phy_sfp_active_unknown:
+ physical_layer = IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA;
+ break;
+ case ixgbe_phy_sfp_avago:
+ case ixgbe_phy_sfp_ftl:
+ case ixgbe_phy_sfp_intel:
+ case ixgbe_phy_sfp_unknown:
+ hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_1GBE_COMP_CODES, &comp_codes_1g);
+ hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_10GBE_COMP_CODES, &comp_codes_10g);
+ if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR;
+ else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR;
+ else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE)
+ physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_T;
+ else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE)
+ physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_SX;
+ break;
+ default:
+ break;
+ }
+
+out:
+ return physical_layer;
+}
+
+/**
+ * ixgbe_enable_rx_dma_82599 - Enable the Rx DMA unit on 82599
+ * @hw: pointer to hardware structure
+ * @regval: register value to write to RXCTRL
+ *
+ * Enables the Rx DMA unit for 82599
+ **/
+s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval)
+{
+
+ /*
+ * Workaround for 82599 silicon errata when enabling the Rx datapath.
+ * If traffic is incoming before we enable the Rx unit, it could hang
+ * the Rx DMA unit. Therefore, make sure the security engine is
+ * completely disabled prior to enabling the Rx unit.
+ */
+
+ hw->mac.ops.disable_sec_rx_path(hw);
+
+ IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval);
+
+ hw->mac.ops.enable_sec_rx_path(hw);
+
+ return 0;
+}
+
+/**
+ * ixgbe_verify_fw_version_82599 - verify fw version for 82599
+ * @hw: pointer to hardware structure
+ *
+ * Verifies that installed the firmware version is 0.6 or higher
+ * for SFI devices. All 82599 SFI devices should have version 0.6 or higher.
+ *
+ * Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or
+ * if the FW version is not supported.
+ **/
+static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw)
+{
+ s32 status = IXGBE_ERR_EEPROM_VERSION;
+ u16 fw_offset, fw_ptp_cfg_offset;
+ u16 fw_version = 0;
+
+ /* firmware check is only necessary for SFI devices */
+ if (hw->phy.media_type != ixgbe_media_type_fiber) {
+ status = 0;
+ goto fw_version_out;
+ }
+
+ /* get the offset to the Firmware Module block */
+ hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset);
+
+ if ((fw_offset == 0) || (fw_offset == 0xFFFF))
+ goto fw_version_out;
+
+ /* get the offset to the Pass Through Patch Configuration block */
+ hw->eeprom.ops.read(hw, (fw_offset +
+ IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR),
+ &fw_ptp_cfg_offset);
+
+ if ((fw_ptp_cfg_offset == 0) || (fw_ptp_cfg_offset == 0xFFFF))
+ goto fw_version_out;
+
+ /* get the firmware version */
+ hw->eeprom.ops.read(hw, (fw_ptp_cfg_offset +
+ IXGBE_FW_PATCH_VERSION_4), &fw_version);
+
+ if (fw_version > 0x5)
+ status = 0;
+
+fw_version_out:
+ return status;
+}
+
+/**
+ * ixgbe_verify_lesm_fw_enabled_82599 - Checks LESM FW module state.
+ * @hw: pointer to hardware structure
+ *
+ * Returns true if the LESM FW module is present and enabled. Otherwise
+ * returns false. Smart Speed must be disabled if LESM FW module is enabled.
+ **/
+bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw)
+{
+ bool lesm_enabled = false;
+ u16 fw_offset, fw_lesm_param_offset, fw_lesm_state;
+ s32 status;
+
+ /* get the offset to the Firmware Module block */
+ status = hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset);
+
+ if ((status != 0) ||
+ (fw_offset == 0) || (fw_offset == 0xFFFF))
+ goto out;
+
+ /* get the offset to the LESM Parameters block */
+ status = hw->eeprom.ops.read(hw, (fw_offset +
+ IXGBE_FW_LESM_PARAMETERS_PTR),
+ &fw_lesm_param_offset);
+
+ if ((status != 0) ||
+ (fw_lesm_param_offset == 0) || (fw_lesm_param_offset == 0xFFFF))
+ goto out;
+
+ /* get the lesm state word */
+ status = hw->eeprom.ops.read(hw, (fw_lesm_param_offset +
+ IXGBE_FW_LESM_STATE_1),
+ &fw_lesm_state);
+
+ if ((status == 0) &&
+ (fw_lesm_state & IXGBE_FW_LESM_STATE_ENABLED))
+ lesm_enabled = true;
+
+out:
+ return lesm_enabled;
+}
+
+/**
+ * ixgbe_read_eeprom_buffer_82599 - Read EEPROM word(s) using
+ * fastest available method
+ *
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in EEPROM to read
+ * @words: number of words
+ * @data: word(s) read from the EEPROM
+ *
+ * Retrieves 16 bit word(s) read from EEPROM
+ **/
+static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data)
+{
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+ s32 ret_val = IXGBE_ERR_CONFIG;
+
+ /*
+ * If EEPROM is detected and can be addressed using 14 bits,
+ * use EERD otherwise use bit bang
+ */
+ if ((eeprom->type == ixgbe_eeprom_spi) &&
+ (offset + (words - 1) <= IXGBE_EERD_MAX_ADDR))
+ ret_val = ixgbe_read_eerd_buffer_generic(hw, offset, words,
+ data);
+ else
+ ret_val = ixgbe_read_eeprom_buffer_bit_bang_generic(hw, offset,
+ words,
+ data);
+
+ return ret_val;
+}
+
+/**
+ * ixgbe_read_eeprom_82599 - Read EEPROM word using
+ * fastest available method
+ *
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM
+ **/
+static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw,
+ u16 offset, u16 *data)
+{
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+ s32 ret_val = IXGBE_ERR_CONFIG;
+
+ /*
+ * If EEPROM is detected and can be addressed using 14 bits,
+ * use EERD otherwise use bit bang
+ */
+ if ((eeprom->type == ixgbe_eeprom_spi) &&
+ (offset <= IXGBE_EERD_MAX_ADDR))
+ ret_val = ixgbe_read_eerd_generic(hw, offset, data);
+ else
+ ret_val = ixgbe_read_eeprom_bit_bang_generic(hw, offset, data);
+
+ return ret_val;
+}
+
+/**
+ * ixgbe_read_i2c_byte_82599 - Reads 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to read
+ * @data: value read
+ *
+ * Performs byte read operation to SFP module's EEPROM over I2C interface at
+ * a specified device address.
+ **/
+static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data)
+{
+ u32 esdp;
+ s32 status;
+ s32 timeout = 200;
+
+ if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
+ /* Acquire I2C bus ownership. */
+ esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+ esdp |= IXGBE_ESDP_SDP0;
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+ IXGBE_WRITE_FLUSH(hw);
+
+ while (timeout) {
+ esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+ if (esdp & IXGBE_ESDP_SDP1)
+ break;
+
+ msleep(5);
+ timeout--;
+ }
+
+ if (!timeout) {
+ hw_dbg(hw, "Driver can't access resource,"
+ " acquiring I2C bus timeout.\n");
+ status = IXGBE_ERR_I2C;
+ goto release_i2c_access;
+ }
+ }
+
+ status = ixgbe_read_i2c_byte_generic(hw, byte_offset, dev_addr, data);
+
+release_i2c_access:
+
+ if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
+ /* Release I2C bus ownership. */
+ esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+ esdp &= ~IXGBE_ESDP_SDP0;
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+ IXGBE_WRITE_FLUSH(hw);
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_write_i2c_byte_82599 - Writes 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to write
+ * @data: value to write
+ *
+ * Performs byte write operation to SFP module's EEPROM over I2C interface at
+ * a specified device address.
+ **/
+static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data)
+{
+ u32 esdp;
+ s32 status;
+ s32 timeout = 200;
+
+ if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
+ /* Acquire I2C bus ownership. */
+ esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+ esdp |= IXGBE_ESDP_SDP0;
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+ IXGBE_WRITE_FLUSH(hw);
+
+ while (timeout) {
+ esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+ if (esdp & IXGBE_ESDP_SDP1)
+ break;
+
+ msleep(5);
+ timeout--;
+ }
+
+ if (!timeout) {
+ hw_dbg(hw, "Driver can't access resource,"
+ " acquiring I2C bus timeout.\n");
+ status = IXGBE_ERR_I2C;
+ goto release_i2c_access;
+ }
+ }
+
+ status = ixgbe_write_i2c_byte_generic(hw, byte_offset, dev_addr, data);
+
+release_i2c_access:
+
+ if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
+ /* Release I2C bus ownership. */
+ esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+ esdp &= ~IXGBE_ESDP_SDP0;
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+ IXGBE_WRITE_FLUSH(hw);
+ }
+
+ return status;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
new file mode 100644
index 00000000..02be92ab
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
@@ -0,0 +1,58 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_82599_H_
+#define _IXGBE_82599_H_
+
+s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed, bool *autoneg);
+enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw);
+void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
+void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
+void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
+s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed, bool autoneg,
+ bool autoneg_wait_to_complete);
+s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed, bool autoneg,
+ bool autoneg_wait_to_complete);
+s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
+ bool autoneg_wait_to_complete);
+s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+ bool autoneg, bool autoneg_wait_to_complete);
+s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw);
+void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw);
+s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw);
+s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val);
+s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val);
+s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw);
+s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw);
+s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw);
+u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw);
+s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval);
+bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw);
+#endif /* _IXGBE_82599_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
new file mode 100644
index 00000000..ef7ce629
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
@@ -0,0 +1,1157 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+
+/**
+ * ixgbe_init_shared_code - Initialize the shared code
+ * @hw: pointer to hardware structure
+ *
+ * This will assign function pointers and assign the MAC type and PHY code.
+ * Does not touch the hardware. This function must be called prior to any
+ * other function in the shared code. The ixgbe_hw structure should be
+ * memset to 0 prior to calling this function. The following fields in
+ * hw structure should be filled in prior to calling this function:
+ * hw_addr, back, device_id, vendor_id, subsystem_device_id,
+ * subsystem_vendor_id, and revision_id
+ **/
+s32 ixgbe_init_shared_code(struct ixgbe_hw *hw)
+{
+ s32 status;
+
+ /*
+ * Set the mac type
+ */
+ ixgbe_set_mac_type(hw);
+
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ status = ixgbe_init_ops_82598(hw);
+ break;
+ case ixgbe_mac_82599EB:
+ status = ixgbe_init_ops_82599(hw);
+ break;
+ case ixgbe_mac_X540:
+ status = ixgbe_init_ops_X540(hw);
+ break;
+ default:
+ status = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the mac type of the adapter based on the
+ * vendor ID and device ID stored in the hw structure.
+ **/
+s32 ixgbe_set_mac_type(struct ixgbe_hw *hw)
+{
+ s32 ret_val = 0;
+
+ if (hw->vendor_id == IXGBE_INTEL_VENDOR_ID) {
+ switch (hw->device_id) {
+ case IXGBE_DEV_ID_82598:
+ case IXGBE_DEV_ID_82598_BX:
+ case IXGBE_DEV_ID_82598AF_SINGLE_PORT:
+ case IXGBE_DEV_ID_82598AF_DUAL_PORT:
+ case IXGBE_DEV_ID_82598AT:
+ case IXGBE_DEV_ID_82598AT2:
+ case IXGBE_DEV_ID_82598EB_CX4:
+ case IXGBE_DEV_ID_82598_CX4_DUAL_PORT:
+ case IXGBE_DEV_ID_82598_DA_DUAL_PORT:
+ case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM:
+ case IXGBE_DEV_ID_82598EB_XF_LR:
+ case IXGBE_DEV_ID_82598EB_SFP_LOM:
+ hw->mac.type = ixgbe_mac_82598EB;
+ break;
+ case IXGBE_DEV_ID_82599_KX4:
+ case IXGBE_DEV_ID_82599_KX4_MEZZ:
+ case IXGBE_DEV_ID_82599_XAUI_LOM:
+ case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
+ case IXGBE_DEV_ID_82599_KR:
+ case IXGBE_DEV_ID_82599_SFP:
+ case IXGBE_DEV_ID_82599_BACKPLANE_FCOE:
+ case IXGBE_DEV_ID_82599_SFP_FCOE:
+ case IXGBE_DEV_ID_82599_SFP_EM:
+ case IXGBE_DEV_ID_82599_SFP_SF2:
+ case IXGBE_DEV_ID_82599_QSFP_SF_QP:
+ case IXGBE_DEV_ID_82599EN_SFP:
+ case IXGBE_DEV_ID_82599_CX4:
+ case IXGBE_DEV_ID_82599_LS:
+ case IXGBE_DEV_ID_82599_T3_LOM:
+ hw->mac.type = ixgbe_mac_82599EB;
+ break;
+ case IXGBE_DEV_ID_X540T:
+ hw->mac.type = ixgbe_mac_X540;
+ break;
+ default:
+ ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
+ break;
+ }
+ } else {
+ ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
+ }
+
+ hw_dbg(hw, "ixgbe_set_mac_type found mac: %d, returns: %d\n",
+ hw->mac.type, ret_val);
+ return ret_val;
+}
+
+/**
+ * ixgbe_init_hw - Initialize the hardware
+ * @hw: pointer to hardware structure
+ *
+ * Initialize the hardware by resetting and then starting the hardware
+ **/
+s32 ixgbe_init_hw(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.init_hw, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_reset_hw - Performs a hardware reset
+ * @hw: pointer to hardware structure
+ *
+ * Resets the hardware by resetting the transmit and receive units, masks and
+ * clears all interrupts, performs a PHY reset, and performs a MAC reset
+ **/
+s32 ixgbe_reset_hw(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.reset_hw, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_start_hw - Prepares hardware for Rx/Tx
+ * @hw: pointer to hardware structure
+ *
+ * Starts the hardware by filling the bus info structure and media type,
+ * clears all on chip counters, initializes receive address registers,
+ * multicast table, VLAN filter table, calls routine to setup link and
+ * flow control settings, and leaves transmit and receive units disabled
+ * and uninitialized.
+ **/
+s32 ixgbe_start_hw(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.start_hw, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_clear_hw_cntrs - Clear hardware counters
+ * @hw: pointer to hardware structure
+ *
+ * Clears all hardware statistics counters by reading them from the hardware
+ * Statistics counters are clear on read.
+ **/
+s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.clear_hw_cntrs, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_get_media_type - Get media type
+ * @hw: pointer to hardware structure
+ *
+ * Returns the media type (fiber, copper, backplane)
+ **/
+enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.get_media_type, (hw),
+ ixgbe_media_type_unknown);
+}
+
+/**
+ * ixgbe_get_mac_addr - Get MAC address
+ * @hw: pointer to hardware structure
+ * @mac_addr: Adapter MAC address
+ *
+ * Reads the adapter's MAC address from the first Receive Address Register
+ * (RAR0) A reset of the adapter must have been performed prior to calling
+ * this function in order for the MAC address to have been loaded from the
+ * EEPROM into RAR0
+ **/
+s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.get_mac_addr,
+ (hw, mac_addr), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_get_san_mac_addr - Get SAN MAC address
+ * @hw: pointer to hardware structure
+ * @san_mac_addr: SAN MAC address
+ *
+ * Reads the SAN MAC address from the EEPROM, if it's available. This is
+ * per-port, so set_lan_id() must be called before reading the addresses.
+ **/
+s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.get_san_mac_addr,
+ (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_set_san_mac_addr - Write a SAN MAC address
+ * @hw: pointer to hardware structure
+ * @san_mac_addr: SAN MAC address
+ *
+ * Writes A SAN MAC address to the EEPROM.
+ **/
+s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.set_san_mac_addr,
+ (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_get_device_caps - Get additional device capabilities
+ * @hw: pointer to hardware structure
+ * @device_caps: the EEPROM word for device capabilities
+ *
+ * Reads the extra device capabilities from the EEPROM
+ **/
+s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.get_device_caps,
+ (hw, device_caps), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_get_wwn_prefix - Get alternative WWNN/WWPN prefix from the EEPROM
+ * @hw: pointer to hardware structure
+ * @wwnn_prefix: the alternative WWNN prefix
+ * @wwpn_prefix: the alternative WWPN prefix
+ *
+ * This function will read the EEPROM from the alternative SAN MAC address
+ * block to check the support for the alternative WWNN/WWPN prefix support.
+ **/
+s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+ u16 *wwpn_prefix)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.get_wwn_prefix,
+ (hw, wwnn_prefix, wwpn_prefix),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_get_fcoe_boot_status - Get FCOE boot status from EEPROM
+ * @hw: pointer to hardware structure
+ * @bs: the fcoe boot status
+ *
+ * This function will read the FCOE boot status from the iSCSI FCOE block
+ **/
+s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.get_fcoe_boot_status,
+ (hw, bs),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_get_bus_info - Set PCI bus info
+ * @hw: pointer to hardware structure
+ *
+ * Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure
+ **/
+s32 ixgbe_get_bus_info(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.get_bus_info, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_get_num_of_tx_queues - Get Tx queues
+ * @hw: pointer to hardware structure
+ *
+ * Returns the number of transmit queues for the given adapter.
+ **/
+u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw)
+{
+ return hw->mac.max_tx_queues;
+}
+
+/**
+ * ixgbe_get_num_of_rx_queues - Get Rx queues
+ * @hw: pointer to hardware structure
+ *
+ * Returns the number of receive queues for the given adapter.
+ **/
+u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw)
+{
+ return hw->mac.max_rx_queues;
+}
+
+/**
+ * ixgbe_stop_adapter - Disable Rx/Tx units
+ * @hw: pointer to hardware structure
+ *
+ * Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts,
+ * disables transmit and receive units. The adapter_stopped flag is used by
+ * the shared code and drivers to determine if the adapter is in a stopped
+ * state and should not touch the hardware.
+ **/
+s32 ixgbe_stop_adapter(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.stop_adapter, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_read_pba_string - Reads part number string from EEPROM
+ * @hw: pointer to hardware structure
+ * @pba_num: stores the part number string from the EEPROM
+ * @pba_num_size: part number string buffer length
+ *
+ * Reads the part number string from the EEPROM.
+ **/
+s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size)
+{
+ return ixgbe_read_pba_string_generic(hw, pba_num, pba_num_size);
+}
+
+/**
+ * ixgbe_identify_phy - Get PHY type
+ * @hw: pointer to hardware structure
+ *
+ * Determines the physical layer module found on the current adapter.
+ **/
+s32 ixgbe_identify_phy(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+
+ if (hw->phy.type == ixgbe_phy_unknown) {
+ status = ixgbe_call_func(hw, hw->phy.ops.identify, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_reset_phy - Perform a PHY reset
+ * @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reset_phy(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+
+ if (hw->phy.type == ixgbe_phy_unknown) {
+ if (ixgbe_identify_phy(hw) != 0)
+ status = IXGBE_ERR_PHY;
+ }
+
+ if (status == 0) {
+ status = ixgbe_call_func(hw, hw->phy.ops.reset, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+ }
+ return status;
+}
+
+/**
+ * ixgbe_get_phy_firmware_version -
+ * @hw: pointer to hardware structure
+ * @firmware_version: pointer to firmware version
+ **/
+s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw, u16 *firmware_version)
+{
+ s32 status = 0;
+
+ status = ixgbe_call_func(hw, hw->phy.ops.get_firmware_version,
+ (hw, firmware_version),
+ IXGBE_NOT_IMPLEMENTED);
+ return status;
+}
+
+/**
+ * ixgbe_read_phy_reg - Read PHY register
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit address of PHY register to read
+ * @phy_data: Pointer to read data from PHY register
+ *
+ * Reads a value from a specified PHY register
+ **/
+s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
+ u16 *phy_data)
+{
+ if (hw->phy.id == 0)
+ ixgbe_identify_phy(hw);
+
+ return ixgbe_call_func(hw, hw->phy.ops.read_reg, (hw, reg_addr,
+ device_type, phy_data), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_write_phy_reg - Write PHY register
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit PHY register to write
+ * @phy_data: Data to write to the PHY register
+ *
+ * Writes a value to specified PHY register
+ **/
+s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
+ u16 phy_data)
+{
+ if (hw->phy.id == 0)
+ ixgbe_identify_phy(hw);
+
+ return ixgbe_call_func(hw, hw->phy.ops.write_reg, (hw, reg_addr,
+ device_type, phy_data), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_setup_phy_link - Restart PHY autoneg
+ * @hw: pointer to hardware structure
+ *
+ * Restart autonegotiation and PHY and waits for completion.
+ **/
+s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.setup_link, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_check_phy_link - Determine link and speed status
+ * @hw: pointer to hardware structure
+ *
+ * Reads a PHY register to determine if link is up and the current speed for
+ * the PHY.
+ **/
+s32 ixgbe_check_phy_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+ bool *link_up)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.check_link, (hw, speed,
+ link_up), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_setup_phy_link_speed - Set auto advertise
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg: true if autonegotiation enabled
+ *
+ * Sets the auto advertised capabilities
+ **/
+s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+ bool autoneg,
+ bool autoneg_wait_to_complete)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.setup_link_speed, (hw, speed,
+ autoneg, autoneg_wait_to_complete),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_check_link - Get link and speed status
+ * @hw: pointer to hardware structure
+ *
+ * Reads the links register to determine if link is up and the current speed
+ **/
+s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+ bool *link_up, bool link_up_wait_to_complete)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.check_link, (hw, speed,
+ link_up, link_up_wait_to_complete),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_disable_tx_laser - Disable Tx laser
+ * @hw: pointer to hardware structure
+ *
+ * If the driver needs to disable the laser on SFI optics.
+ **/
+void ixgbe_disable_tx_laser(struct ixgbe_hw *hw)
+{
+ if (hw->mac.ops.disable_tx_laser)
+ hw->mac.ops.disable_tx_laser(hw);
+}
+
+/**
+ * ixgbe_enable_tx_laser - Enable Tx laser
+ * @hw: pointer to hardware structure
+ *
+ * If the driver needs to enable the laser on SFI optics.
+ **/
+void ixgbe_enable_tx_laser(struct ixgbe_hw *hw)
+{
+ if (hw->mac.ops.enable_tx_laser)
+ hw->mac.ops.enable_tx_laser(hw);
+}
+
+/**
+ * ixgbe_flap_tx_laser - flap Tx laser to start autotry process
+ * @hw: pointer to hardware structure
+ *
+ * When the driver changes the link speeds that it can support then
+ * flap the tx laser to alert the link partner to start autotry
+ * process on its end.
+ **/
+void ixgbe_flap_tx_laser(struct ixgbe_hw *hw)
+{
+ if (hw->mac.ops.flap_tx_laser)
+ hw->mac.ops.flap_tx_laser(hw);
+}
+
+/**
+ * ixgbe_setup_link - Set link speed
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg: true if autonegotiation enabled
+ *
+ * Configures link settings. Restarts the link.
+ * Performs autonegotiation if needed.
+ **/
+s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+ bool autoneg,
+ bool autoneg_wait_to_complete)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.setup_link, (hw, speed,
+ autoneg, autoneg_wait_to_complete),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_get_link_capabilities - Returns link capabilities
+ * @hw: pointer to hardware structure
+ *
+ * Determines the link capabilities of the current configuration.
+ **/
+s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+ bool *autoneg)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.get_link_capabilities, (hw,
+ speed, autoneg), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_led_on - Turn on LEDs
+ * @hw: pointer to hardware structure
+ * @index: led number to turn on
+ *
+ * Turns on the software controllable LEDs.
+ **/
+s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.led_on, (hw, index),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_led_off - Turn off LEDs
+ * @hw: pointer to hardware structure
+ * @index: led number to turn off
+ *
+ * Turns off the software controllable LEDs.
+ **/
+s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.led_off, (hw, index),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_blink_led_start - Blink LEDs
+ * @hw: pointer to hardware structure
+ * @index: led number to blink
+ *
+ * Blink LED based on index.
+ **/
+s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.blink_led_start, (hw, index),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_blink_led_stop - Stop blinking LEDs
+ * @hw: pointer to hardware structure
+ *
+ * Stop blinking LED based on index.
+ **/
+s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.blink_led_stop, (hw, index),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_init_eeprom_params - Initialize EEPROM parameters
+ * @hw: pointer to hardware structure
+ *
+ * Initializes the EEPROM parameters ixgbe_eeprom_info within the
+ * ixgbe_hw struct in order to set up EEPROM access.
+ **/
+s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->eeprom.ops.init_params, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+
+/**
+ * ixgbe_write_eeprom - Write word to EEPROM
+ * @hw: pointer to hardware structure
+ * @offset: offset within the EEPROM to be written to
+ * @data: 16 bit word to be written to the EEPROM
+ *
+ * Writes 16 bit value to EEPROM. If ixgbe_eeprom_update_checksum is not
+ * called after this function, the EEPROM will most likely contain an
+ * invalid checksum.
+ **/
+s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+ return ixgbe_call_func(hw, hw->eeprom.ops.write, (hw, offset, data),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_write_eeprom_buffer - Write word(s) to EEPROM
+ * @hw: pointer to hardware structure
+ * @offset: offset within the EEPROM to be written to
+ * @data: 16 bit word(s) to be written to the EEPROM
+ * @words: number of words
+ *
+ * Writes 16 bit word(s) to EEPROM. If ixgbe_eeprom_update_checksum is not
+ * called after this function, the EEPROM will most likely contain an
+ * invalid checksum.
+ **/
+s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, u16 words,
+ u16 *data)
+{
+ return ixgbe_call_func(hw, hw->eeprom.ops.write_buffer,
+ (hw, offset, words, data),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_read_eeprom - Read word from EEPROM
+ * @hw: pointer to hardware structure
+ * @offset: offset within the EEPROM to be read
+ * @data: read 16 bit value from EEPROM
+ *
+ * Reads 16 bit value from EEPROM
+ **/
+s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data)
+{
+ return ixgbe_call_func(hw, hw->eeprom.ops.read, (hw, offset, data),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_read_eeprom_buffer - Read word(s) from EEPROM
+ * @hw: pointer to hardware structure
+ * @offset: offset within the EEPROM to be read
+ * @data: read 16 bit word(s) from EEPROM
+ * @words: number of words
+ *
+ * Reads 16 bit word(s) from EEPROM
+ **/
+s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data)
+{
+ return ixgbe_call_func(hw, hw->eeprom.ops.read_buffer,
+ (hw, offset, words, data),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_validate_eeprom_checksum - Validate EEPROM checksum
+ * @hw: pointer to hardware structure
+ * @checksum_val: calculated checksum
+ *
+ * Performs checksum calculation and validates the EEPROM checksum
+ **/
+s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val)
+{
+ return ixgbe_call_func(hw, hw->eeprom.ops.validate_checksum,
+ (hw, checksum_val), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_eeprom_update_checksum - Updates the EEPROM checksum
+ * @hw: pointer to hardware structure
+ **/
+s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->eeprom.ops.update_checksum, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_insert_mac_addr - Find a RAR for this mac address
+ * @hw: pointer to hardware structure
+ * @addr: Address to put into receive address register
+ * @vmdq: VMDq pool to assign
+ *
+ * Puts an ethernet address into a receive address register, or
+ * finds the rar that it is aleady in; adds to the pool list
+ **/
+s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.insert_mac_addr,
+ (hw, addr, vmdq),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_set_rar - Set Rx address register
+ * @hw: pointer to hardware structure
+ * @index: Receive address register to write
+ * @addr: Address to put into receive address register
+ * @vmdq: VMDq "set"
+ * @enable_addr: set flag that address is active
+ *
+ * Puts an ethernet address into a receive address register.
+ **/
+s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+ u32 enable_addr)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.set_rar, (hw, index, addr, vmdq,
+ enable_addr), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_clear_rar - Clear Rx address register
+ * @hw: pointer to hardware structure
+ * @index: Receive address register to write
+ *
+ * Puts an ethernet address into a receive address register.
+ **/
+s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.clear_rar, (hw, index),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_set_vmdq - Associate a VMDq index with a receive address
+ * @hw: pointer to hardware structure
+ * @rar: receive address register index to associate with VMDq index
+ * @vmdq: VMDq set or pool index
+ **/
+s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.set_vmdq, (hw, rar, vmdq),
+ IXGBE_NOT_IMPLEMENTED);
+
+}
+
+/**
+ * ixgbe_set_vmdq_san_mac - Associate VMDq index 127 with a receive address
+ * @hw: pointer to hardware structure
+ * @vmdq: VMDq default pool index
+ **/
+s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.set_vmdq_san_mac,
+ (hw, vmdq), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_clear_vmdq - Disassociate a VMDq index from a receive address
+ * @hw: pointer to hardware structure
+ * @rar: receive address register index to disassociate with VMDq index
+ * @vmdq: VMDq set or pool index
+ **/
+s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.clear_vmdq, (hw, rar, vmdq),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_init_rx_addrs - Initializes receive address filters.
+ * @hw: pointer to hardware structure
+ *
+ * Places the MAC address in receive address register 0 and clears the rest
+ * of the receive address registers. Clears the multicast table. Assumes
+ * the receiver is in reset when the routine is called.
+ **/
+s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.init_rx_addrs, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_get_num_rx_addrs - Returns the number of RAR entries.
+ * @hw: pointer to hardware structure
+ **/
+u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw)
+{
+ return hw->mac.num_rar_entries;
+}
+
+/**
+ * ixgbe_update_uc_addr_list - Updates the MAC's list of secondary addresses
+ * @hw: pointer to hardware structure
+ * @addr_list: the list of new multicast addresses
+ * @addr_count: number of addresses
+ * @func: iterator function to walk the multicast address list
+ *
+ * The given list replaces any existing list. Clears the secondary addrs from
+ * receive address registers. Uses unused receive address registers for the
+ * first secondary addresses, and falls back to promiscuous mode as needed.
+ **/
+s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list,
+ u32 addr_count, ixgbe_mc_addr_itr func)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.update_uc_addr_list, (hw,
+ addr_list, addr_count, func),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_update_mc_addr_list - Updates the MAC's list of multicast addresses
+ * @hw: pointer to hardware structure
+ * @mc_addr_list: the list of new multicast addresses
+ * @mc_addr_count: number of addresses
+ * @func: iterator function to walk the multicast address list
+ *
+ * The given list replaces any existing list. Clears the MC addrs from receive
+ * address registers and the multicast table. Uses unused receive address
+ * registers for the first multicast addresses, and hashes the rest into the
+ * multicast table.
+ **/
+s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list,
+ u32 mc_addr_count, ixgbe_mc_addr_itr func,
+ bool clear)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.update_mc_addr_list, (hw,
+ mc_addr_list, mc_addr_count, func, clear),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_enable_mc - Enable multicast address in RAR
+ * @hw: pointer to hardware structure
+ *
+ * Enables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ixgbe_enable_mc(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.enable_mc, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_disable_mc - Disable multicast address in RAR
+ * @hw: pointer to hardware structure
+ *
+ * Disables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ixgbe_disable_mc(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.disable_mc, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_clear_vfta - Clear VLAN filter table
+ * @hw: pointer to hardware structure
+ *
+ * Clears the VLAN filer table, and the VMDq index associated with the filter
+ **/
+s32 ixgbe_clear_vfta(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.clear_vfta, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_set_vfta - Set VLAN filter table
+ * @hw: pointer to hardware structure
+ * @vlan: VLAN id to write to VLAN filter
+ * @vind: VMDq output index that maps queue to VLAN id in VFTA
+ * @vlan_on: boolean flag to turn on/off VLAN in VFTA
+ *
+ * Turn on/off specified VLAN in the VLAN filter table.
+ **/
+s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.set_vfta, (hw, vlan, vind,
+ vlan_on), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_set_vlvf - Set VLAN Pool Filter
+ * @hw: pointer to hardware structure
+ * @vlan: VLAN id to write to VLAN filter
+ * @vind: VMDq output index that maps queue to VLAN id in VFVFB
+ * @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ * @vfta_changed: pointer to boolean flag which indicates whether VFTA
+ * should be changed
+ *
+ * Turn on/off specified bit in VLVF table.
+ **/
+s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on,
+ bool *vfta_changed)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.set_vlvf, (hw, vlan, vind,
+ vlan_on, vfta_changed), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_fc_enable - Enable flow control
+ * @hw: pointer to hardware structure
+ *
+ * Configures the flow control settings based on SW configuration.
+ **/
+s32 ixgbe_fc_enable(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.fc_enable, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_set_fw_drv_ver - Try to send the driver version number FW
+ * @hw: pointer to hardware structure
+ * @maj: driver major number to be sent to firmware
+ * @min: driver minor number to be sent to firmware
+ * @build: driver build number to be sent to firmware
+ * @ver: driver version number to be sent to firmware
+ **/
+s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build,
+ u8 ver)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.set_fw_drv_ver, (hw, maj, min,
+ build, ver), IXGBE_NOT_IMPLEMENTED);
+}
+
+
+/**
+ * ixgbe_get_thermal_sensor_data - Gathers thermal sensor data
+ * @hw: pointer to hardware structure
+ *
+ * Updates the temperatures in mac.thermal_sensor_data
+ **/
+s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.get_thermal_sensor_data, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_init_thermal_sensor_thresh - Inits thermal sensor thresholds
+ * @hw: pointer to hardware structure
+ *
+ * Inits the thermal sensor thresholds according to the NVM map
+ **/
+s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.init_thermal_sensor_thresh, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+/**
+ * ixgbe_read_analog_reg8 - Reads 8 bit analog register
+ * @hw: pointer to hardware structure
+ * @reg: analog register to read
+ * @val: read value
+ *
+ * Performs write operation to analog register specified.
+ **/
+s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.read_analog_reg8, (hw, reg,
+ val), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_write_analog_reg8 - Writes 8 bit analog register
+ * @hw: pointer to hardware structure
+ * @reg: analog register to write
+ * @val: value to write
+ *
+ * Performs write operation to Atlas analog register specified.
+ **/
+s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.write_analog_reg8, (hw, reg,
+ val), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_init_uta_tables - Initializes Unicast Table Arrays.
+ * @hw: pointer to hardware structure
+ *
+ * Initializes the Unicast Table Arrays to zero on device load. This
+ * is part of the Rx init addr execution path.
+ **/
+s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.init_uta_tables, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_read_i2c_byte - Reads 8 bit word over I2C at specified device address
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to read
+ * @data: value read
+ *
+ * Performs byte read operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
+ u8 *data)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.read_i2c_byte, (hw, byte_offset,
+ dev_addr, data), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_write_i2c_byte - Writes 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to write
+ * @data: value to write
+ *
+ * Performs byte write operation to SFP module's EEPROM over I2C interface
+ * at a specified device address.
+ **/
+s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
+ u8 data)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.write_i2c_byte, (hw, byte_offset,
+ dev_addr, data), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_write_i2c_eeprom - Writes 8 bit EEPROM word over I2C interface
+ * @hw: pointer to hardware structure
+ * @byte_offset: EEPROM byte offset to write
+ * @eeprom_data: value to write
+ *
+ * Performs byte write operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw,
+ u8 byte_offset, u8 eeprom_data)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.write_i2c_eeprom,
+ (hw, byte_offset, eeprom_data),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_read_i2c_eeprom - Reads 8 bit EEPROM word over I2C interface
+ * @hw: pointer to hardware structure
+ * @byte_offset: EEPROM byte offset to read
+ * @eeprom_data: value read
+ *
+ * Performs byte read operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.read_i2c_eeprom,
+ (hw, byte_offset, eeprom_data),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_get_supported_physical_layer - Returns physical layer type
+ * @hw: pointer to hardware structure
+ *
+ * Determines physical layer capabilities of the current configuration.
+ **/
+u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.get_supported_physical_layer,
+ (hw), IXGBE_PHYSICAL_LAYER_UNKNOWN);
+}
+
+/**
+ * ixgbe_enable_rx_dma - Enables Rx DMA unit, dependent on device specifics
+ * @hw: pointer to hardware structure
+ * @regval: bitfield to write to the Rx DMA register
+ *
+ * Enables the Rx DMA unit of the device.
+ **/
+s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.enable_rx_dma,
+ (hw, regval), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_disable_sec_rx_path - Stops the receive data path
+ * @hw: pointer to hardware structure
+ *
+ * Stops the receive data path.
+ **/
+s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.disable_sec_rx_path,
+ (hw), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_enable_sec_rx_path - Enables the receive data path
+ * @hw: pointer to hardware structure
+ *
+ * Enables the receive data path.
+ **/
+s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.enable_sec_rx_path,
+ (hw), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_acquire_swfw_semaphore - Acquire SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to acquire
+ *
+ * Acquires the SWFW semaphore through SW_FW_SYNC register for the specified
+ * function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask)
+{
+ return ixgbe_call_func(hw, hw->mac.ops.acquire_swfw_sync,
+ (hw, mask), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_release_swfw_semaphore - Release SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to release
+ *
+ * Releases the SWFW semaphore through SW_FW_SYNC register for the specified
+ * function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask)
+{
+ if (hw->mac.ops.release_swfw_sync)
+ hw->mac.ops.release_swfw_sync(hw, mask);
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
new file mode 100644
index 00000000..a6ab30d2
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
@@ -0,0 +1,168 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_API_H_
+#define _IXGBE_API_H_
+
+#include "ixgbe_type.h"
+
+s32 ixgbe_init_shared_code(struct ixgbe_hw *hw);
+
+extern s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw);
+extern s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw);
+extern s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw);
+
+s32 ixgbe_set_mac_type(struct ixgbe_hw *hw);
+s32 ixgbe_init_hw(struct ixgbe_hw *hw);
+s32 ixgbe_reset_hw(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw(struct ixgbe_hw *hw);
+s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw);
+enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw);
+s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr);
+s32 ixgbe_get_bus_info(struct ixgbe_hw *hw);
+u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw);
+u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw);
+s32 ixgbe_stop_adapter(struct ixgbe_hw *hw);
+s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size);
+
+s32 ixgbe_identify_phy(struct ixgbe_hw *hw);
+s32 ixgbe_reset_phy(struct ixgbe_hw *hw);
+s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
+ u16 *phy_data);
+s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
+ u16 phy_data);
+
+s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw);
+s32 ixgbe_check_phy_link(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed,
+ bool *link_up);
+s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg,
+ bool autoneg_wait_to_complete);
+void ixgbe_disable_tx_laser(struct ixgbe_hw *hw);
+void ixgbe_enable_tx_laser(struct ixgbe_hw *hw);
+void ixgbe_flap_tx_laser(struct ixgbe_hw *hw);
+s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+ bool autoneg, bool autoneg_wait_to_complete);
+s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+ bool *link_up, bool link_up_wait_to_complete);
+s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+ bool *autoneg);
+s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index);
+
+s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw);
+s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data);
+s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data);
+s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data);
+s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data);
+
+s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val);
+s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw);
+
+s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq);
+s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+ u32 enable_addr);
+s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq);
+s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw);
+u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw);
+s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list,
+ u32 addr_count, ixgbe_mc_addr_itr func);
+s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list,
+ u32 mc_addr_count, ixgbe_mc_addr_itr func,
+ bool clear);
+void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr_list, u32 vmdq);
+s32 ixgbe_enable_mc(struct ixgbe_hw *hw);
+s32 ixgbe_disable_mc(struct ixgbe_hw *hw);
+s32 ixgbe_clear_vfta(struct ixgbe_hw *hw);
+s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan,
+ u32 vind, bool vlan_on);
+s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+ bool vlan_on, bool *vfta_changed);
+s32 ixgbe_fc_enable(struct ixgbe_hw *hw);
+s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build,
+ u8 ver);
+s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw);
+s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw);
+void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr);
+s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw,
+ u16 *firmware_version);
+s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val);
+s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val);
+s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw);
+s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data);
+u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw);
+s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval);
+s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw);
+s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw);
+s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
+s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl);
+s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl);
+s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
+ union ixgbe_atr_hash_dword input,
+ union ixgbe_atr_hash_dword common,
+ u8 queue);
+s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+ union ixgbe_atr_input *input_mask);
+s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+ union ixgbe_atr_input *input,
+ u16 soft_id, u8 queue);
+s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+ union ixgbe_atr_input *input,
+ u16 soft_id);
+s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
+ union ixgbe_atr_input *input,
+ union ixgbe_atr_input *mask,
+ u16 soft_id,
+ u8 queue);
+void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
+ union ixgbe_atr_input *mask);
+u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
+ union ixgbe_atr_hash_dword common);
+s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
+ u8 *data);
+s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
+ u8 data);
+s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 eeprom_data);
+s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
+s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
+s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps);
+s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask);
+void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask);
+s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+ u16 *wwpn_prefix);
+s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs);
+
+#endif /* _IXGBE_API_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
new file mode 100644
index 00000000..93659ca0
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
@@ -0,0 +1,4082 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+#include "ixgbe_api.h"
+
+static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw);
+static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw);
+static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw);
+static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw);
+static void ixgbe_standby_eeprom(struct ixgbe_hw *hw);
+static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data,
+ u16 count);
+static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count);
+static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
+static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
+static void ixgbe_release_eeprom(struct ixgbe_hw *hw);
+
+static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr);
+static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
+ u16 *san_mac_offset);
+static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data);
+static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data);
+static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
+ u16 offset);
+
+/**
+ * ixgbe_init_ops_generic - Inits function ptrs
+ * @hw: pointer to the hardware structure
+ *
+ * Initialize the function pointers.
+ **/
+s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw)
+{
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+ struct ixgbe_mac_info *mac = &hw->mac;
+ u32 eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+ /* EEPROM */
+ eeprom->ops.init_params = &ixgbe_init_eeprom_params_generic;
+ /* If EEPROM is valid (bit 8 = 1), use EERD otherwise use bit bang */
+ if (eec & IXGBE_EEC_PRES) {
+ eeprom->ops.read = &ixgbe_read_eerd_generic;
+ eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_generic;
+ } else {
+ eeprom->ops.read = &ixgbe_read_eeprom_bit_bang_generic;
+ eeprom->ops.read_buffer =
+ &ixgbe_read_eeprom_buffer_bit_bang_generic;
+ }
+ eeprom->ops.write = &ixgbe_write_eeprom_generic;
+ eeprom->ops.write_buffer = &ixgbe_write_eeprom_buffer_bit_bang_generic;
+ eeprom->ops.validate_checksum =
+ &ixgbe_validate_eeprom_checksum_generic;
+ eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_generic;
+ eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_generic;
+
+ /* MAC */
+ mac->ops.init_hw = &ixgbe_init_hw_generic;
+ mac->ops.reset_hw = NULL;
+ mac->ops.start_hw = &ixgbe_start_hw_generic;
+ mac->ops.clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic;
+ mac->ops.get_media_type = NULL;
+ mac->ops.get_supported_physical_layer = NULL;
+ mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_generic;
+ mac->ops.get_mac_addr = &ixgbe_get_mac_addr_generic;
+ mac->ops.stop_adapter = &ixgbe_stop_adapter_generic;
+ mac->ops.get_bus_info = &ixgbe_get_bus_info_generic;
+ mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie;
+ mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync;
+ mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync;
+
+ /* LEDs */
+ mac->ops.led_on = &ixgbe_led_on_generic;
+ mac->ops.led_off = &ixgbe_led_off_generic;
+ mac->ops.blink_led_start = &ixgbe_blink_led_start_generic;
+ mac->ops.blink_led_stop = &ixgbe_blink_led_stop_generic;
+
+ /* RAR, Multicast, VLAN */
+ mac->ops.set_rar = &ixgbe_set_rar_generic;
+ mac->ops.clear_rar = &ixgbe_clear_rar_generic;
+ mac->ops.insert_mac_addr = NULL;
+ mac->ops.set_vmdq = NULL;
+ mac->ops.clear_vmdq = NULL;
+ mac->ops.init_rx_addrs = &ixgbe_init_rx_addrs_generic;
+ mac->ops.update_uc_addr_list = &ixgbe_update_uc_addr_list_generic;
+ mac->ops.update_mc_addr_list = &ixgbe_update_mc_addr_list_generic;
+ mac->ops.enable_mc = &ixgbe_enable_mc_generic;
+ mac->ops.disable_mc = &ixgbe_disable_mc_generic;
+ mac->ops.clear_vfta = NULL;
+ mac->ops.set_vfta = NULL;
+ mac->ops.set_vlvf = NULL;
+ mac->ops.init_uta_tables = NULL;
+
+ /* Flow Control */
+ mac->ops.fc_enable = &ixgbe_fc_enable_generic;
+
+ /* Link */
+ mac->ops.get_link_capabilities = NULL;
+ mac->ops.setup_link = NULL;
+ mac->ops.check_link = NULL;
+
+ return 0;
+}
+
+/**
+ * ixgbe_device_supports_autoneg_fc - Check if phy supports autoneg flow
+ * control
+ * @hw: pointer to hardware structure
+ *
+ * There are several phys that do not support autoneg flow control. This
+ * function check the device id to see if the associated phy supports
+ * autoneg flow control.
+ **/
+static s32 ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
+{
+
+ switch (hw->device_id) {
+ case IXGBE_DEV_ID_X540T:
+ return 0;
+ case IXGBE_DEV_ID_82599_T3_LOM:
+ return 0;
+ default:
+ return IXGBE_ERR_FC_NOT_SUPPORTED;
+ }
+}
+
+/**
+ * ixgbe_setup_fc - Set up flow control
+ * @hw: pointer to hardware structure
+ *
+ * Called at init time to set up flow control.
+ **/
+static s32 ixgbe_setup_fc(struct ixgbe_hw *hw)
+{
+ s32 ret_val = 0;
+ u32 reg = 0, reg_bp = 0;
+ u16 reg_cu = 0;
+
+ /*
+ * Validate the requested mode. Strict IEEE mode does not allow
+ * ixgbe_fc_rx_pause because it will cause us to fail at UNH.
+ */
+ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
+ hw_dbg(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
+ ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+ goto out;
+ }
+
+ /*
+ * 10gig parts do not have a word in the EEPROM to determine the
+ * default flow control setting, so we explicitly set it to full.
+ */
+ if (hw->fc.requested_mode == ixgbe_fc_default)
+ hw->fc.requested_mode = ixgbe_fc_full;
+
+ /*
+ * Set up the 1G and 10G flow control advertisement registers so the
+ * HW will be able to do fc autoneg once the cable is plugged in. If
+ * we link at 10G, the 1G advertisement is harmless and vice versa.
+ */
+ switch (hw->phy.media_type) {
+ case ixgbe_media_type_fiber:
+ case ixgbe_media_type_backplane:
+ reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
+ reg_bp = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ break;
+ case ixgbe_media_type_copper:
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg_cu);
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * The possible values of fc.requested_mode are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause frames,
+ * but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames but
+ * we do not support receiving pause frames).
+ * 3: Both Rx and Tx flow control (symmetric) are enabled.
+ * other: Invalid.
+ */
+ switch (hw->fc.requested_mode) {
+ case ixgbe_fc_none:
+ /* Flow control completely disabled by software override. */
+ reg &= ~(IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE);
+ if (hw->phy.media_type == ixgbe_media_type_backplane)
+ reg_bp &= ~(IXGBE_AUTOC_SYM_PAUSE |
+ IXGBE_AUTOC_ASM_PAUSE);
+ else if (hw->phy.media_type == ixgbe_media_type_copper)
+ reg_cu &= ~(IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE);
+ break;
+ case ixgbe_fc_tx_pause:
+ /*
+ * Tx Flow control is enabled, and Rx Flow control is
+ * disabled by software override.
+ */
+ reg |= IXGBE_PCS1GANA_ASM_PAUSE;
+ reg &= ~IXGBE_PCS1GANA_SYM_PAUSE;
+ if (hw->phy.media_type == ixgbe_media_type_backplane) {
+ reg_bp |= IXGBE_AUTOC_ASM_PAUSE;
+ reg_bp &= ~IXGBE_AUTOC_SYM_PAUSE;
+ } else if (hw->phy.media_type == ixgbe_media_type_copper) {
+ reg_cu |= IXGBE_TAF_ASM_PAUSE;
+ reg_cu &= ~IXGBE_TAF_SYM_PAUSE;
+ }
+ break;
+ case ixgbe_fc_rx_pause:
+ /*
+ * Rx Flow control is enabled and Tx Flow control is
+ * disabled by software override. Since there really
+ * isn't a way to advertise that we are capable of RX
+ * Pause ONLY, we will advertise that we support both
+ * symmetric and asymmetric Rx PAUSE, as such we fall
+ * through to the fc_full statement. Later, we will
+ * disable the adapter's ability to send PAUSE frames.
+ */
+ case ixgbe_fc_full:
+ /* Flow control (both Rx and Tx) is enabled by SW override. */
+ reg |= IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE;
+ if (hw->phy.media_type == ixgbe_media_type_backplane)
+ reg_bp |= IXGBE_AUTOC_SYM_PAUSE |
+ IXGBE_AUTOC_ASM_PAUSE;
+ else if (hw->phy.media_type == ixgbe_media_type_copper)
+ reg_cu |= IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE;
+ break;
+ default:
+ hw_dbg(hw, "Flow control param set incorrectly\n");
+ ret_val = IXGBE_ERR_CONFIG;
+ goto out;
+ break;
+ }
+
+ if (hw->mac.type != ixgbe_mac_X540) {
+ /*
+ * Enable auto-negotiation between the MAC & PHY;
+ * the MAC will advertise clause 37 flow control.
+ */
+ IXGBE_WRITE_REG(hw, IXGBE_PCS1GANA, reg);
+ reg = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL);
+
+ /* Disable AN timeout */
+ if (hw->fc.strict_ieee)
+ reg &= ~IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN;
+
+ IXGBE_WRITE_REG(hw, IXGBE_PCS1GLCTL, reg);
+ hw_dbg(hw, "Set up FC; PCS1GLCTL = 0x%08X\n", reg);
+ }
+
+ /*
+ * AUTOC restart handles negotiation of 1G and 10G on backplane
+ * and copper. There is no need to set the PCS1GCTL register.
+ *
+ */
+ if (hw->phy.media_type == ixgbe_media_type_backplane) {
+ reg_bp |= IXGBE_AUTOC_AN_RESTART;
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_bp);
+ } else if ((hw->phy.media_type == ixgbe_media_type_copper) &&
+ (ixgbe_device_supports_autoneg_fc(hw) == 0)) {
+ hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg_cu);
+ }
+
+ hw_dbg(hw, "Set up FC; IXGBE_AUTOC = 0x%08X\n", reg);
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_start_hw_generic - Prepare hardware for Tx/Rx
+ * @hw: pointer to hardware structure
+ *
+ * Starts the hardware by filling the bus info structure and media type, clears
+ * all on chip counters, initializes receive address registers, multicast
+ * table, VLAN filter table, calls routine to set up link and flow control
+ * settings, and leaves transmit and receive units disabled and uninitialized
+ **/
+s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
+{
+ s32 ret_val;
+ u32 ctrl_ext;
+
+ /* Set the media type */
+ hw->phy.media_type = hw->mac.ops.get_media_type(hw);
+
+ /* PHY ops initialization must be done in reset_hw() */
+
+ /* Clear the VLAN filter table */
+ hw->mac.ops.clear_vfta(hw);
+
+ /* Clear statistics registers */
+ hw->mac.ops.clear_hw_cntrs(hw);
+
+ /* Set No Snoop Disable */
+ ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
+ ctrl_ext |= IXGBE_CTRL_EXT_NS_DIS;
+ IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* Setup flow control */
+ ret_val = ixgbe_setup_fc(hw);
+ if (ret_val != 0)
+ goto out;
+
+ /* Clear adapter stopped flag */
+ hw->adapter_stopped = false;
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_start_hw_gen2 - Init sequence for common device family
+ * @hw: pointer to hw structure
+ *
+ * Performs the init sequence common to the second generation
+ * of 10 GbE devices.
+ * Devices in the second generation:
+ * 82599
+ * X540
+ **/
+s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
+{
+ u32 i;
+ u32 regval;
+
+ /* Clear the rate limiters */
+ for (i = 0; i < hw->mac.max_tx_queues; i++) {
+ IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, i);
+ IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, 0);
+ }
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* Disable relaxed ordering */
+ for (i = 0; i < hw->mac.max_tx_queues; i++) {
+ regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
+ regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
+ IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval);
+ }
+
+ for (i = 0; i < hw->mac.max_rx_queues; i++) {
+ regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
+ regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
+ IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
+ IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_init_hw_generic - Generic hardware initialization
+ * @hw: pointer to hardware structure
+ *
+ * Initialize the hardware by resetting the hardware, filling the bus info
+ * structure and media type, clears all on chip counters, initializes receive
+ * address registers, multicast table, VLAN filter table, calls routine to set
+ * up link and flow control settings, and leaves transmit and receive units
+ * disabled and uninitialized
+ **/
+s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw)
+{
+ s32 status;
+
+ /* Reset the hardware */
+ status = hw->mac.ops.reset_hw(hw);
+
+ if (status == 0) {
+ /* Start the HW */
+ status = hw->mac.ops.start_hw(hw);
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_clear_hw_cntrs_generic - Generic clear hardware counters
+ * @hw: pointer to hardware structure
+ *
+ * Clears all hardware statistics counters by reading them from the hardware
+ * Statistics counters are clear on read.
+ **/
+s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw)
+{
+ u16 i = 0;
+
+ IXGBE_READ_REG(hw, IXGBE_CRCERRS);
+ IXGBE_READ_REG(hw, IXGBE_ILLERRC);
+ IXGBE_READ_REG(hw, IXGBE_ERRBC);
+ IXGBE_READ_REG(hw, IXGBE_MSPDC);
+ for (i = 0; i < 8; i++)
+ IXGBE_READ_REG(hw, IXGBE_MPC(i));
+
+ IXGBE_READ_REG(hw, IXGBE_MLFC);
+ IXGBE_READ_REG(hw, IXGBE_MRFC);
+ IXGBE_READ_REG(hw, IXGBE_RLEC);
+ IXGBE_READ_REG(hw, IXGBE_LXONTXC);
+ IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
+ if (hw->mac.type >= ixgbe_mac_82599EB) {
+ IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
+ IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
+ } else {
+ IXGBE_READ_REG(hw, IXGBE_LXONRXC);
+ IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
+ }
+
+ for (i = 0; i < 8; i++) {
+ IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
+ IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
+ if (hw->mac.type >= ixgbe_mac_82599EB) {
+ IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
+ IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
+ } else {
+ IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
+ IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
+ }
+ }
+ if (hw->mac.type >= ixgbe_mac_82599EB)
+ for (i = 0; i < 8; i++)
+ IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
+ IXGBE_READ_REG(hw, IXGBE_PRC64);
+ IXGBE_READ_REG(hw, IXGBE_PRC127);
+ IXGBE_READ_REG(hw, IXGBE_PRC255);
+ IXGBE_READ_REG(hw, IXGBE_PRC511);
+ IXGBE_READ_REG(hw, IXGBE_PRC1023);
+ IXGBE_READ_REG(hw, IXGBE_PRC1522);
+ IXGBE_READ_REG(hw, IXGBE_GPRC);
+ IXGBE_READ_REG(hw, IXGBE_BPRC);
+ IXGBE_READ_REG(hw, IXGBE_MPRC);
+ IXGBE_READ_REG(hw, IXGBE_GPTC);
+ IXGBE_READ_REG(hw, IXGBE_GORCL);
+ IXGBE_READ_REG(hw, IXGBE_GORCH);
+ IXGBE_READ_REG(hw, IXGBE_GOTCL);
+ IXGBE_READ_REG(hw, IXGBE_GOTCH);
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ for (i = 0; i < 8; i++)
+ IXGBE_READ_REG(hw, IXGBE_RNBC(i));
+ IXGBE_READ_REG(hw, IXGBE_RUC);
+ IXGBE_READ_REG(hw, IXGBE_RFC);
+ IXGBE_READ_REG(hw, IXGBE_ROC);
+ IXGBE_READ_REG(hw, IXGBE_RJC);
+ IXGBE_READ_REG(hw, IXGBE_MNGPRC);
+ IXGBE_READ_REG(hw, IXGBE_MNGPDC);
+ IXGBE_READ_REG(hw, IXGBE_MNGPTC);
+ IXGBE_READ_REG(hw, IXGBE_TORL);
+ IXGBE_READ_REG(hw, IXGBE_TORH);
+ IXGBE_READ_REG(hw, IXGBE_TPR);
+ IXGBE_READ_REG(hw, IXGBE_TPT);
+ IXGBE_READ_REG(hw, IXGBE_PTC64);
+ IXGBE_READ_REG(hw, IXGBE_PTC127);
+ IXGBE_READ_REG(hw, IXGBE_PTC255);
+ IXGBE_READ_REG(hw, IXGBE_PTC511);
+ IXGBE_READ_REG(hw, IXGBE_PTC1023);
+ IXGBE_READ_REG(hw, IXGBE_PTC1522);
+ IXGBE_READ_REG(hw, IXGBE_MPTC);
+ IXGBE_READ_REG(hw, IXGBE_BPTC);
+ for (i = 0; i < 16; i++) {
+ IXGBE_READ_REG(hw, IXGBE_QPRC(i));
+ IXGBE_READ_REG(hw, IXGBE_QPTC(i));
+ if (hw->mac.type >= ixgbe_mac_82599EB) {
+ IXGBE_READ_REG(hw, IXGBE_QBRC_L(i));
+ IXGBE_READ_REG(hw, IXGBE_QBRC_H(i));
+ IXGBE_READ_REG(hw, IXGBE_QBTC_L(i));
+ IXGBE_READ_REG(hw, IXGBE_QBTC_H(i));
+ IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
+ } else {
+ IXGBE_READ_REG(hw, IXGBE_QBRC(i));
+ IXGBE_READ_REG(hw, IXGBE_QBTC(i));
+ }
+ }
+
+ if (hw->mac.type == ixgbe_mac_X540) {
+ if (hw->phy.id == 0)
+ ixgbe_identify_phy(hw);
+ hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECL,
+ IXGBE_MDIO_PCS_DEV_TYPE, &i);
+ hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECH,
+ IXGBE_MDIO_PCS_DEV_TYPE, &i);
+ hw->phy.ops.read_reg(hw, IXGBE_LDPCECL,
+ IXGBE_MDIO_PCS_DEV_TYPE, &i);
+ hw->phy.ops.read_reg(hw, IXGBE_LDPCECH,
+ IXGBE_MDIO_PCS_DEV_TYPE, &i);
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_read_pba_string_generic - Reads part number string from EEPROM
+ * @hw: pointer to hardware structure
+ * @pba_num: stores the part number string from the EEPROM
+ * @pba_num_size: part number string buffer length
+ *
+ * Reads the part number string from the EEPROM.
+ **/
+s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
+ u32 pba_num_size)
+{
+ s32 ret_val;
+ u16 data;
+ u16 pba_ptr;
+ u16 offset;
+ u16 length;
+
+ if (pba_num == NULL) {
+ hw_dbg(hw, "PBA string buffer was null\n");
+ return IXGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM0_PTR, &data);
+ if (ret_val) {
+ hw_dbg(hw, "NVM Read Error\n");
+ return ret_val;
+ }
+
+ ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM1_PTR, &pba_ptr);
+ if (ret_val) {
+ hw_dbg(hw, "NVM Read Error\n");
+ return ret_val;
+ }
+
+ /*
+ * if data is not ptr guard the PBA must be in legacy format which
+ * means pba_ptr is actually our second data word for the PBA number
+ * and we can decode it into an ascii string
+ */
+ if (data != IXGBE_PBANUM_PTR_GUARD) {
+ hw_dbg(hw, "NVM PBA number is not stored as string\n");
+
+ /* we will need 11 characters to store the PBA */
+ if (pba_num_size < 11) {
+ hw_dbg(hw, "PBA string buffer too small\n");
+ return IXGBE_ERR_NO_SPACE;
+ }
+
+ /* extract hex string from data and pba_ptr */
+ pba_num[0] = (data >> 12) & 0xF;
+ pba_num[1] = (data >> 8) & 0xF;
+ pba_num[2] = (data >> 4) & 0xF;
+ pba_num[3] = data & 0xF;
+ pba_num[4] = (pba_ptr >> 12) & 0xF;
+ pba_num[5] = (pba_ptr >> 8) & 0xF;
+ pba_num[6] = '-';
+ pba_num[7] = 0;
+ pba_num[8] = (pba_ptr >> 4) & 0xF;
+ pba_num[9] = pba_ptr & 0xF;
+
+ /* put a null character on the end of our string */
+ pba_num[10] = '\0';
+
+ /* switch all the data but the '-' to hex char */
+ for (offset = 0; offset < 10; offset++) {
+ if (pba_num[offset] < 0xA)
+ pba_num[offset] += '0';
+ else if (pba_num[offset] < 0x10)
+ pba_num[offset] += 'A' - 0xA;
+ }
+
+ return 0;
+ }
+
+ ret_val = hw->eeprom.ops.read(hw, pba_ptr, &length);
+ if (ret_val) {
+ hw_dbg(hw, "NVM Read Error\n");
+ return ret_val;
+ }
+
+ if (length == 0xFFFF || length == 0) {
+ hw_dbg(hw, "NVM PBA number section invalid length\n");
+ return IXGBE_ERR_PBA_SECTION;
+ }
+
+ /* check if pba_num buffer is big enough */
+ if (pba_num_size < (((u32)length * 2) - 1)) {
+ hw_dbg(hw, "PBA string buffer too small\n");
+ return IXGBE_ERR_NO_SPACE;
+ }
+
+ /* trim pba length from start of string */
+ pba_ptr++;
+ length--;
+
+ for (offset = 0; offset < length; offset++) {
+ ret_val = hw->eeprom.ops.read(hw, pba_ptr + offset, &data);
+ if (ret_val) {
+ hw_dbg(hw, "NVM Read Error\n");
+ return ret_val;
+ }
+ pba_num[offset * 2] = (u8)(data >> 8);
+ pba_num[(offset * 2) + 1] = (u8)(data & 0xFF);
+ }
+ pba_num[offset * 2] = '\0';
+
+ return 0;
+}
+
+/**
+ * ixgbe_get_mac_addr_generic - Generic get MAC address
+ * @hw: pointer to hardware structure
+ * @mac_addr: Adapter MAC address
+ *
+ * Reads the adapter's MAC address from first Receive Address Register (RAR0)
+ * A reset of the adapter must be performed prior to calling this function
+ * in order for the MAC address to have been loaded from the EEPROM into RAR0
+ **/
+s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr)
+{
+ u32 rar_high;
+ u32 rar_low;
+ u16 i;
+
+ rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0));
+ rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0));
+
+ for (i = 0; i < 4; i++)
+ mac_addr[i] = (u8)(rar_low >> (i*8));
+
+ for (i = 0; i < 2; i++)
+ mac_addr[i+4] = (u8)(rar_high >> (i*8));
+
+ return 0;
+}
+
+/**
+ * ixgbe_get_bus_info_generic - Generic set PCI bus info
+ * @hw: pointer to hardware structure
+ *
+ * Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure
+ **/
+s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw)
+{
+ struct ixgbe_mac_info *mac = &hw->mac;
+ u16 link_status;
+
+ hw->bus.type = ixgbe_bus_type_pci_express;
+
+ /* Get the negotiated link width and speed from PCI config space */
+ link_status = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS);
+
+ switch (link_status & IXGBE_PCI_LINK_WIDTH) {
+ case IXGBE_PCI_LINK_WIDTH_1:
+ hw->bus.width = ixgbe_bus_width_pcie_x1;
+ break;
+ case IXGBE_PCI_LINK_WIDTH_2:
+ hw->bus.width = ixgbe_bus_width_pcie_x2;
+ break;
+ case IXGBE_PCI_LINK_WIDTH_4:
+ hw->bus.width = ixgbe_bus_width_pcie_x4;
+ break;
+ case IXGBE_PCI_LINK_WIDTH_8:
+ hw->bus.width = ixgbe_bus_width_pcie_x8;
+ break;
+ default:
+ hw->bus.width = ixgbe_bus_width_unknown;
+ break;
+ }
+
+ switch (link_status & IXGBE_PCI_LINK_SPEED) {
+ case IXGBE_PCI_LINK_SPEED_2500:
+ hw->bus.speed = ixgbe_bus_speed_2500;
+ break;
+ case IXGBE_PCI_LINK_SPEED_5000:
+ hw->bus.speed = ixgbe_bus_speed_5000;
+ break;
+ case IXGBE_PCI_LINK_SPEED_8000:
+ hw->bus.speed = ixgbe_bus_speed_8000;
+ break;
+ default:
+ hw->bus.speed = ixgbe_bus_speed_unknown;
+ break;
+ }
+
+ mac->ops.set_lan_id(hw);
+
+ return 0;
+}
+
+/**
+ * ixgbe_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices
+ * @hw: pointer to the HW structure
+ *
+ * Determines the LAN function id by reading memory-mapped registers
+ * and swaps the port value if requested.
+ **/
+void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
+{
+ struct ixgbe_bus_info *bus = &hw->bus;
+ u32 reg;
+
+ reg = IXGBE_READ_REG(hw, IXGBE_STATUS);
+ bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT;
+ bus->lan_id = bus->func;
+
+ /* check for a port swap */
+ reg = IXGBE_READ_REG(hw, IXGBE_FACTPS);
+ if (reg & IXGBE_FACTPS_LFS)
+ bus->func ^= 0x1;
+}
+
+/**
+ * ixgbe_stop_adapter_generic - Generic stop Tx/Rx units
+ * @hw: pointer to hardware structure
+ *
+ * Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts,
+ * disables transmit and receive units. The adapter_stopped flag is used by
+ * the shared code and drivers to determine if the adapter is in a stopped
+ * state and should not touch the hardware.
+ **/
+s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw)
+{
+ u32 reg_val;
+ u16 i;
+
+ /*
+ * Set the adapter_stopped flag so other driver functions stop touching
+ * the hardware
+ */
+ hw->adapter_stopped = true;
+
+ /* Disable the receive unit */
+ IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, 0);
+
+ /* Clear interrupt mask to stop interrupts from being generated */
+ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK);
+
+ /* Clear any pending interrupts, flush previous writes */
+ IXGBE_READ_REG(hw, IXGBE_EICR);
+
+ /* Disable the transmit unit. Each queue must be disabled. */
+ for (i = 0; i < hw->mac.max_tx_queues; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), IXGBE_TXDCTL_SWFLSH);
+
+ /* Disable the receive unit by stopping each queue */
+ for (i = 0; i < hw->mac.max_rx_queues; i++) {
+ reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
+ reg_val &= ~IXGBE_RXDCTL_ENABLE;
+ reg_val |= IXGBE_RXDCTL_SWFLSH;
+ IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), reg_val);
+ }
+
+ /* flush all queues disables */
+ IXGBE_WRITE_FLUSH(hw);
+ msleep(2);
+
+ /*
+ * Prevent the PCI-E bus from from hanging by disabling PCI-E master
+ * access and verify no pending requests
+ */
+ return ixgbe_disable_pcie_master(hw);
+}
+
+/**
+ * ixgbe_led_on_generic - Turns on the software controllable LEDs.
+ * @hw: pointer to hardware structure
+ * @index: led number to turn on
+ **/
+s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index)
+{
+ u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+ /* To turn on the LED, set mode to ON. */
+ led_reg &= ~IXGBE_LED_MODE_MASK(index);
+ led_reg |= IXGBE_LED_ON << IXGBE_LED_MODE_SHIFT(index);
+ IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+ IXGBE_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+/**
+ * ixgbe_led_off_generic - Turns off the software controllable LEDs.
+ * @hw: pointer to hardware structure
+ * @index: led number to turn off
+ **/
+s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index)
+{
+ u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+ /* To turn off the LED, set mode to OFF. */
+ led_reg &= ~IXGBE_LED_MODE_MASK(index);
+ led_reg |= IXGBE_LED_OFF << IXGBE_LED_MODE_SHIFT(index);
+ IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+ IXGBE_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+/**
+ * ixgbe_init_eeprom_params_generic - Initialize EEPROM params
+ * @hw: pointer to hardware structure
+ *
+ * Initializes the EEPROM parameters ixgbe_eeprom_info within the
+ * ixgbe_hw struct in order to set up EEPROM access.
+ **/
+s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw)
+{
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+ u32 eec;
+ u16 eeprom_size;
+
+ if (eeprom->type == ixgbe_eeprom_uninitialized) {
+ eeprom->type = ixgbe_eeprom_none;
+ /* Set default semaphore delay to 10ms which is a well
+ * tested value */
+ eeprom->semaphore_delay = 10;
+ /* Clear EEPROM page size, it will be initialized as needed */
+ eeprom->word_page_size = 0;
+
+ /*
+ * Check for EEPROM present first.
+ * If not present leave as none
+ */
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+ if (eec & IXGBE_EEC_PRES) {
+ eeprom->type = ixgbe_eeprom_spi;
+
+ /*
+ * SPI EEPROM is assumed here. This code would need to
+ * change if a future EEPROM is not SPI.
+ */
+ eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+ IXGBE_EEC_SIZE_SHIFT);
+ eeprom->word_size = 1 << (eeprom_size +
+ IXGBE_EEPROM_WORD_SIZE_SHIFT);
+ }
+
+ if (eec & IXGBE_EEC_ADDR_SIZE)
+ eeprom->address_bits = 16;
+ else
+ eeprom->address_bits = 8;
+ hw_dbg(hw, "Eeprom params: type = %d, size = %d, address bits: "
+ "%d\n", eeprom->type, eeprom->word_size,
+ eeprom->address_bits);
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_write_eeprom_buffer_bit_bang_generic - Write EEPROM using bit-bang
+ * @hw: pointer to hardware structure
+ * @offset: offset within the EEPROM to write
+ * @words: number of word(s)
+ * @data: 16 bit word(s) to write to EEPROM
+ *
+ * Reads 16 bit word(s) from EEPROM through bit-bang method
+ **/
+s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data)
+{
+ s32 status = 0;
+ u16 i, count;
+
+ hw->eeprom.ops.init_params(hw);
+
+ if (words == 0) {
+ status = IXGBE_ERR_INVALID_ARGUMENT;
+ goto out;
+ }
+
+ if (offset + words > hw->eeprom.word_size) {
+ status = IXGBE_ERR_EEPROM;
+ goto out;
+ }
+
+ /*
+ * The EEPROM page size cannot be queried from the chip. We do lazy
+ * initialization. It is worth to do that when we write large buffer.
+ */
+ if ((hw->eeprom.word_page_size == 0) &&
+ (words > IXGBE_EEPROM_PAGE_SIZE_MAX))
+ ixgbe_detect_eeprom_page_size_generic(hw, offset);
+
+ /*
+ * We cannot hold synchronization semaphores for too long
+ * to avoid other entity starvation. However it is more efficient
+ * to read in bursts than synchronizing access for each word.
+ */
+ for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) {
+ count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ?
+ IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i);
+ status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset + i,
+ count, &data[i]);
+
+ if (status != 0)
+ break;
+ }
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_write_eeprom_buffer_bit_bang - Writes 16 bit word(s) to EEPROM
+ * @hw: pointer to hardware structure
+ * @offset: offset within the EEPROM to be written to
+ * @words: number of word(s)
+ * @data: 16 bit word(s) to be written to the EEPROM
+ *
+ * If ixgbe_eeprom_update_checksum is not called after this function, the
+ * EEPROM will most likely contain an invalid checksum.
+ **/
+static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data)
+{
+ s32 status;
+ u16 word;
+ u16 page_size;
+ u16 i;
+ u8 write_opcode = IXGBE_EEPROM_WRITE_OPCODE_SPI;
+
+ /* Prepare the EEPROM for writing */
+ status = ixgbe_acquire_eeprom(hw);
+
+ if (status == 0) {
+ if (ixgbe_ready_eeprom(hw) != 0) {
+ ixgbe_release_eeprom(hw);
+ status = IXGBE_ERR_EEPROM;
+ }
+ }
+
+ if (status == 0) {
+ for (i = 0; i < words; i++) {
+ ixgbe_standby_eeprom(hw);
+
+ /* Send the WRITE ENABLE command (8 bit opcode ) */
+ ixgbe_shift_out_eeprom_bits(hw,
+ IXGBE_EEPROM_WREN_OPCODE_SPI,
+ IXGBE_EEPROM_OPCODE_BITS);
+
+ ixgbe_standby_eeprom(hw);
+
+ /*
+ * Some SPI eeproms use the 8th address bit embedded
+ * in the opcode
+ */
+ if ((hw->eeprom.address_bits == 8) &&
+ ((offset + i) >= 128))
+ write_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI;
+
+ /* Send the Write command (8-bit opcode + addr) */
+ ixgbe_shift_out_eeprom_bits(hw, write_opcode,
+ IXGBE_EEPROM_OPCODE_BITS);
+ ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2),
+ hw->eeprom.address_bits);
+
+ page_size = hw->eeprom.word_page_size;
+
+ /* Send the data in burst via SPI*/
+ do {
+ word = data[i];
+ word = (word >> 8) | (word << 8);
+ ixgbe_shift_out_eeprom_bits(hw, word, 16);
+
+ if (page_size == 0)
+ break;
+
+ /* do not wrap around page */
+ if (((offset + i) & (page_size - 1)) ==
+ (page_size - 1))
+ break;
+ } while (++i < words);
+
+ ixgbe_standby_eeprom(hw);
+ msleep(10);
+ }
+ /* Done with writing - release the EEPROM */
+ ixgbe_release_eeprom(hw);
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_write_eeprom_generic - Writes 16 bit value to EEPROM
+ * @hw: pointer to hardware structure
+ * @offset: offset within the EEPROM to be written to
+ * @data: 16 bit word to be written to the EEPROM
+ *
+ * If ixgbe_eeprom_update_checksum is not called after this function, the
+ * EEPROM will most likely contain an invalid checksum.
+ **/
+s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+ s32 status;
+
+ hw->eeprom.ops.init_params(hw);
+
+ if (offset >= hw->eeprom.word_size) {
+ status = IXGBE_ERR_EEPROM;
+ goto out;
+ }
+
+ status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset, 1, &data);
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_read_eeprom_buffer_bit_bang_generic - Read EEPROM using bit-bang
+ * @hw: pointer to hardware structure
+ * @offset: offset within the EEPROM to be read
+ * @data: read 16 bit words(s) from EEPROM
+ * @words: number of word(s)
+ *
+ * Reads 16 bit word(s) from EEPROM through bit-bang method
+ **/
+s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data)
+{
+ s32 status = 0;
+ u16 i, count;
+
+ hw->eeprom.ops.init_params(hw);
+
+ if (words == 0) {
+ status = IXGBE_ERR_INVALID_ARGUMENT;
+ goto out;
+ }
+
+ if (offset + words > hw->eeprom.word_size) {
+ status = IXGBE_ERR_EEPROM;
+ goto out;
+ }
+
+ /*
+ * We cannot hold synchronization semaphores for too long
+ * to avoid other entity starvation. However it is more efficient
+ * to read in bursts than synchronizing access for each word.
+ */
+ for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) {
+ count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ?
+ IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i);
+
+ status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset + i,
+ count, &data[i]);
+
+ if (status != 0)
+ break;
+ }
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_read_eeprom_buffer_bit_bang - Read EEPROM using bit-bang
+ * @hw: pointer to hardware structure
+ * @offset: offset within the EEPROM to be read
+ * @words: number of word(s)
+ * @data: read 16 bit word(s) from EEPROM
+ *
+ * Reads 16 bit word(s) from EEPROM through bit-bang method
+ **/
+static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data)
+{
+ s32 status;
+ u16 word_in;
+ u8 read_opcode = IXGBE_EEPROM_READ_OPCODE_SPI;
+ u16 i;
+
+ /* Prepare the EEPROM for reading */
+ status = ixgbe_acquire_eeprom(hw);
+
+ if (status == 0) {
+ if (ixgbe_ready_eeprom(hw) != 0) {
+ ixgbe_release_eeprom(hw);
+ status = IXGBE_ERR_EEPROM;
+ }
+ }
+
+ if (status == 0) {
+ for (i = 0; i < words; i++) {
+ ixgbe_standby_eeprom(hw);
+ /*
+ * Some SPI eeproms use the 8th address bit embedded
+ * in the opcode
+ */
+ if ((hw->eeprom.address_bits == 8) &&
+ ((offset + i) >= 128))
+ read_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI;
+
+ /* Send the READ command (opcode + addr) */
+ ixgbe_shift_out_eeprom_bits(hw, read_opcode,
+ IXGBE_EEPROM_OPCODE_BITS);
+ ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2),
+ hw->eeprom.address_bits);
+
+ /* Read the data. */
+ word_in = ixgbe_shift_in_eeprom_bits(hw, 16);
+ data[i] = (word_in >> 8) | (word_in << 8);
+ }
+
+ /* End this read operation */
+ ixgbe_release_eeprom(hw);
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_read_eeprom_bit_bang_generic - Read EEPROM word using bit-bang
+ * @hw: pointer to hardware structure
+ * @offset: offset within the EEPROM to be read
+ * @data: read 16 bit value from EEPROM
+ *
+ * Reads 16 bit value from EEPROM through bit-bang method
+ **/
+s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+ u16 *data)
+{
+ s32 status;
+
+ hw->eeprom.ops.init_params(hw);
+
+ if (offset >= hw->eeprom.word_size) {
+ status = IXGBE_ERR_EEPROM;
+ goto out;
+ }
+
+ status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data);
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_read_eerd_buffer_generic - Read EEPROM word(s) using EERD
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @words: number of word(s)
+ * @data: 16 bit word(s) from the EEPROM
+ *
+ * Reads a 16 bit word(s) from the EEPROM using the EERD register.
+ **/
+s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data)
+{
+ u32 eerd;
+ s32 status = 0;
+ u32 i;
+
+ hw->eeprom.ops.init_params(hw);
+
+ if (words == 0) {
+ status = IXGBE_ERR_INVALID_ARGUMENT;
+ goto out;
+ }
+
+ if (offset >= hw->eeprom.word_size) {
+ status = IXGBE_ERR_EEPROM;
+ goto out;
+ }
+
+ for (i = 0; i < words; i++) {
+ eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) +
+ IXGBE_EEPROM_RW_REG_START;
+
+ IXGBE_WRITE_REG(hw, IXGBE_EERD, eerd);
+ status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_READ);
+
+ if (status == 0) {
+ data[i] = (IXGBE_READ_REG(hw, IXGBE_EERD) >>
+ IXGBE_EEPROM_RW_REG_DATA);
+ } else {
+ hw_dbg(hw, "Eeprom read timed out\n");
+ goto out;
+ }
+ }
+out:
+ return status;
+}
+
+/**
+ * ixgbe_detect_eeprom_page_size_generic - Detect EEPROM page size
+ * @hw: pointer to hardware structure
+ * @offset: offset within the EEPROM to be used as a scratch pad
+ *
+ * Discover EEPROM page size by writing marching data at given offset.
+ * This function is called only when we are writing a new large buffer
+ * at given offset so the data would be overwritten anyway.
+ **/
+static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
+ u16 offset)
+{
+ u16 data[IXGBE_EEPROM_PAGE_SIZE_MAX];
+ s32 status = 0;
+ u16 i;
+
+ for (i = 0; i < IXGBE_EEPROM_PAGE_SIZE_MAX; i++)
+ data[i] = i;
+
+ hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX;
+ status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset,
+ IXGBE_EEPROM_PAGE_SIZE_MAX, data);
+ hw->eeprom.word_page_size = 0;
+ if (status != 0)
+ goto out;
+
+ status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data);
+ if (status != 0)
+ goto out;
+
+ /*
+ * When writing in burst more than the actual page size
+ * EEPROM address wraps around current page.
+ */
+ hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX - data[0];
+
+ hw_dbg(hw, "Detected EEPROM page size = %d words.",
+ hw->eeprom.word_page_size);
+out:
+ return status;
+}
+
+/**
+ * ixgbe_read_eerd_generic - Read EEPROM word using EERD
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM using the EERD register.
+ **/
+s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data)
+{
+ return ixgbe_read_eerd_buffer_generic(hw, offset, 1, data);
+}
+
+/**
+ * ixgbe_write_eewr_buffer_generic - Write EEPROM word(s) using EEWR
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @words: number of word(s)
+ * @data: word(s) write to the EEPROM
+ *
+ * Write a 16 bit word(s) to the EEPROM using the EEWR register.
+ **/
+s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data)
+{
+ u32 eewr;
+ s32 status = 0;
+ u16 i;
+
+ hw->eeprom.ops.init_params(hw);
+
+ if (words == 0) {
+ status = IXGBE_ERR_INVALID_ARGUMENT;
+ goto out;
+ }
+
+ if (offset >= hw->eeprom.word_size) {
+ status = IXGBE_ERR_EEPROM;
+ goto out;
+ }
+
+ for (i = 0; i < words; i++) {
+ eewr = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) |
+ (data[i] << IXGBE_EEPROM_RW_REG_DATA) |
+ IXGBE_EEPROM_RW_REG_START;
+
+ status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE);
+ if (status != 0) {
+ hw_dbg(hw, "Eeprom write EEWR timed out\n");
+ goto out;
+ }
+
+ IXGBE_WRITE_REG(hw, IXGBE_EEWR, eewr);
+
+ status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE);
+ if (status != 0) {
+ hw_dbg(hw, "Eeprom write EEWR timed out\n");
+ goto out;
+ }
+ }
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_write_eewr_generic - Write EEPROM word using EEWR
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @data: word write to the EEPROM
+ *
+ * Write a 16 bit word to the EEPROM using the EEWR register.
+ **/
+s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+ return ixgbe_write_eewr_buffer_generic(hw, offset, 1, &data);
+}
+
+/**
+ * ixgbe_poll_eerd_eewr_done - Poll EERD read or EEWR write status
+ * @hw: pointer to hardware structure
+ * @ee_reg: EEPROM flag for polling
+ *
+ * Polls the status bit (bit 1) of the EERD or EEWR to determine when the
+ * read or write is done respectively.
+ **/
+s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg)
+{
+ u32 i;
+ u32 reg;
+ s32 status = IXGBE_ERR_EEPROM;
+
+ for (i = 0; i < IXGBE_EERD_EEWR_ATTEMPTS; i++) {
+ if (ee_reg == IXGBE_NVM_POLL_READ)
+ reg = IXGBE_READ_REG(hw, IXGBE_EERD);
+ else
+ reg = IXGBE_READ_REG(hw, IXGBE_EEWR);
+
+ if (reg & IXGBE_EEPROM_RW_REG_DONE) {
+ status = 0;
+ break;
+ }
+ udelay(5);
+ }
+ return status;
+}
+
+/**
+ * ixgbe_acquire_eeprom - Acquire EEPROM using bit-bang
+ * @hw: pointer to hardware structure
+ *
+ * Prepares EEPROM for access using bit-bang method. This function should
+ * be called before issuing a command to the EEPROM.
+ **/
+static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+ u32 eec;
+ u32 i;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)
+ != 0)
+ status = IXGBE_ERR_SWFW_SYNC;
+
+ if (status == 0) {
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+ /* Request EEPROM Access */
+ eec |= IXGBE_EEC_REQ;
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+
+ for (i = 0; i < IXGBE_EEPROM_GRANT_ATTEMPTS; i++) {
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+ if (eec & IXGBE_EEC_GNT)
+ break;
+ udelay(5);
+ }
+
+ /* Release if grant not acquired */
+ if (!(eec & IXGBE_EEC_GNT)) {
+ eec &= ~IXGBE_EEC_REQ;
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+ hw_dbg(hw, "Could not acquire EEPROM grant\n");
+
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+ status = IXGBE_ERR_EEPROM;
+ }
+
+ /* Setup EEPROM for Read/Write */
+ if (status == 0) {
+ /* Clear CS and SK */
+ eec &= ~(IXGBE_EEC_CS | IXGBE_EEC_SK);
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+ IXGBE_WRITE_FLUSH(hw);
+ udelay(1);
+ }
+ }
+ return status;
+}
+
+/**
+ * ixgbe_get_eeprom_semaphore - Get hardware semaphore
+ * @hw: pointer to hardware structure
+ *
+ * Sets the hardware semaphores so EEPROM access can occur for bit-bang method
+ **/
+static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw)
+{
+ s32 status = IXGBE_ERR_EEPROM;
+ u32 timeout = 2000;
+ u32 i;
+ u32 swsm;
+
+ /* Get SMBI software semaphore between device drivers first */
+ for (i = 0; i < timeout; i++) {
+ /*
+ * If the SMBI bit is 0 when we read it, then the bit will be
+ * set and we have the semaphore
+ */
+ swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+ if (!(swsm & IXGBE_SWSM_SMBI)) {
+ status = 0;
+ break;
+ }
+ udelay(50);
+ }
+
+ if (i == timeout) {
+ hw_dbg(hw, "Driver can't access the Eeprom - SMBI Semaphore "
+ "not granted.\n");
+ /*
+ * this release is particularly important because our attempts
+ * above to get the semaphore may have succeeded, and if there
+ * was a timeout, we should unconditionally clear the semaphore
+ * bits to free the driver to make progress
+ */
+ ixgbe_release_eeprom_semaphore(hw);
+
+ udelay(50);
+ /*
+ * one last try
+ * If the SMBI bit is 0 when we read it, then the bit will be
+ * set and we have the semaphore
+ */
+ swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+ if (!(swsm & IXGBE_SWSM_SMBI))
+ status = 0;
+ }
+
+ /* Now get the semaphore between SW/FW through the SWESMBI bit */
+ if (status == 0) {
+ for (i = 0; i < timeout; i++) {
+ swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+
+ /* Set the SW EEPROM semaphore bit to request access */
+ swsm |= IXGBE_SWSM_SWESMBI;
+ IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm);
+
+ /*
+ * If we set the bit successfully then we got the
+ * semaphore.
+ */
+ swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+ if (swsm & IXGBE_SWSM_SWESMBI)
+ break;
+
+ udelay(50);
+ }
+
+ /*
+ * Release semaphores and return error if SW EEPROM semaphore
+ * was not granted because we don't have access to the EEPROM
+ */
+ if (i >= timeout) {
+ hw_dbg(hw, "SWESMBI Software EEPROM semaphore "
+ "not granted.\n");
+ ixgbe_release_eeprom_semaphore(hw);
+ status = IXGBE_ERR_EEPROM;
+ }
+ } else {
+ hw_dbg(hw, "Software semaphore SMBI between device drivers "
+ "not granted.\n");
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_release_eeprom_semaphore - Release hardware semaphore
+ * @hw: pointer to hardware structure
+ *
+ * This function clears hardware semaphore bits.
+ **/
+static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw)
+{
+ u32 swsm;
+
+ swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+
+ /* Release both semaphores by writing 0 to the bits SWESMBI and SMBI */
+ swsm &= ~(IXGBE_SWSM_SWESMBI | IXGBE_SWSM_SMBI);
+ IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm);
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ready_eeprom - Polls for EEPROM ready
+ * @hw: pointer to hardware structure
+ **/
+static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+ u16 i;
+ u8 spi_stat_reg;
+
+ /*
+ * Read "Status Register" repeatedly until the LSB is cleared. The
+ * EEPROM will signal that the command has been completed by clearing
+ * bit 0 of the internal status register. If it's not cleared within
+ * 5 milliseconds, then error out.
+ */
+ for (i = 0; i < IXGBE_EEPROM_MAX_RETRY_SPI; i += 5) {
+ ixgbe_shift_out_eeprom_bits(hw, IXGBE_EEPROM_RDSR_OPCODE_SPI,
+ IXGBE_EEPROM_OPCODE_BITS);
+ spi_stat_reg = (u8)ixgbe_shift_in_eeprom_bits(hw, 8);
+ if (!(spi_stat_reg & IXGBE_EEPROM_STATUS_RDY_SPI))
+ break;
+
+ udelay(5);
+ ixgbe_standby_eeprom(hw);
+ };
+
+ /*
+ * On some parts, SPI write time could vary from 0-20mSec on 3.3V
+ * devices (and only 0-5mSec on 5V devices)
+ */
+ if (i >= IXGBE_EEPROM_MAX_RETRY_SPI) {
+ hw_dbg(hw, "SPI EEPROM Status error\n");
+ status = IXGBE_ERR_EEPROM;
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_standby_eeprom - Returns EEPROM to a "standby" state
+ * @hw: pointer to hardware structure
+ **/
+static void ixgbe_standby_eeprom(struct ixgbe_hw *hw)
+{
+ u32 eec;
+
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+ /* Toggle CS to flush commands */
+ eec |= IXGBE_EEC_CS;
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+ IXGBE_WRITE_FLUSH(hw);
+ udelay(1);
+ eec &= ~IXGBE_EEC_CS;
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+ IXGBE_WRITE_FLUSH(hw);
+ udelay(1);
+}
+
+/**
+ * ixgbe_shift_out_eeprom_bits - Shift data bits out to the EEPROM.
+ * @hw: pointer to hardware structure
+ * @data: data to send to the EEPROM
+ * @count: number of bits to shift out
+ **/
+static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data,
+ u16 count)
+{
+ u32 eec;
+ u32 mask;
+ u32 i;
+
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+ /*
+ * Mask is used to shift "count" bits of "data" out to the EEPROM
+ * one bit at a time. Determine the starting bit based on count
+ */
+ mask = 0x01 << (count - 1);
+
+ for (i = 0; i < count; i++) {
+ /*
+ * A "1" is shifted out to the EEPROM by setting bit "DI" to a
+ * "1", and then raising and then lowering the clock (the SK
+ * bit controls the clock input to the EEPROM). A "0" is
+ * shifted out to the EEPROM by setting "DI" to "0" and then
+ * raising and then lowering the clock.
+ */
+ if (data & mask)
+ eec |= IXGBE_EEC_DI;
+ else
+ eec &= ~IXGBE_EEC_DI;
+
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+ IXGBE_WRITE_FLUSH(hw);
+
+ udelay(1);
+
+ ixgbe_raise_eeprom_clk(hw, &eec);
+ ixgbe_lower_eeprom_clk(hw, &eec);
+
+ /*
+ * Shift mask to signify next bit of data to shift in to the
+ * EEPROM
+ */
+ mask = mask >> 1;
+ };
+
+ /* We leave the "DI" bit set to "0" when we leave this routine. */
+ eec &= ~IXGBE_EEC_DI;
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_shift_in_eeprom_bits - Shift data bits in from the EEPROM
+ * @hw: pointer to hardware structure
+ **/
+static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count)
+{
+ u32 eec;
+ u32 i;
+ u16 data = 0;
+
+ /*
+ * In order to read a register from the EEPROM, we need to shift
+ * 'count' bits in from the EEPROM. Bits are "shifted in" by raising
+ * the clock input to the EEPROM (setting the SK bit), and then reading
+ * the value of the "DO" bit. During this "shifting in" process the
+ * "DI" bit should always be clear.
+ */
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+ eec &= ~(IXGBE_EEC_DO | IXGBE_EEC_DI);
+
+ for (i = 0; i < count; i++) {
+ data = data << 1;
+ ixgbe_raise_eeprom_clk(hw, &eec);
+
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+ eec &= ~(IXGBE_EEC_DI);
+ if (eec & IXGBE_EEC_DO)
+ data |= 1;
+
+ ixgbe_lower_eeprom_clk(hw, &eec);
+ }
+
+ return data;
+}
+
+/**
+ * ixgbe_raise_eeprom_clk - Raises the EEPROM's clock input.
+ * @hw: pointer to hardware structure
+ * @eec: EEC register's current value
+ **/
+static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec)
+{
+ /*
+ * Raise the clock input to the EEPROM
+ * (setting the SK bit), then delay
+ */
+ *eec = *eec | IXGBE_EEC_SK;
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec);
+ IXGBE_WRITE_FLUSH(hw);
+ udelay(1);
+}
+
+/**
+ * ixgbe_lower_eeprom_clk - Lowers the EEPROM's clock input.
+ * @hw: pointer to hardware structure
+ * @eecd: EECD's current value
+ **/
+static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec)
+{
+ /*
+ * Lower the clock input to the EEPROM (clearing the SK bit), then
+ * delay
+ */
+ *eec = *eec & ~IXGBE_EEC_SK;
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec);
+ IXGBE_WRITE_FLUSH(hw);
+ udelay(1);
+}
+
+/**
+ * ixgbe_release_eeprom - Release EEPROM, release semaphores
+ * @hw: pointer to hardware structure
+ **/
+static void ixgbe_release_eeprom(struct ixgbe_hw *hw)
+{
+ u32 eec;
+
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+ eec |= IXGBE_EEC_CS; /* Pull CS high */
+ eec &= ~IXGBE_EEC_SK; /* Lower SCK */
+
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+ IXGBE_WRITE_FLUSH(hw);
+
+ udelay(1);
+
+ /* Stop requesting EEPROM access */
+ eec &= ~IXGBE_EEC_REQ;
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+
+ /* Delay before attempt to obtain semaphore again to allow FW access */
+ msleep(hw->eeprom.semaphore_delay);
+}
+
+/**
+ * ixgbe_calc_eeprom_checksum_generic - Calculates and returns the checksum
+ * @hw: pointer to hardware structure
+ **/
+u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw)
+{
+ u16 i;
+ u16 j;
+ u16 checksum = 0;
+ u16 length = 0;
+ u16 pointer = 0;
+ u16 word = 0;
+
+ /* Include 0x0-0x3F in the checksum */
+ for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) {
+ if (hw->eeprom.ops.read(hw, i, &word) != 0) {
+ hw_dbg(hw, "EEPROM read failed\n");
+ break;
+ }
+ checksum += word;
+ }
+
+ /* Include all data from pointers except for the fw pointer */
+ for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) {
+ hw->eeprom.ops.read(hw, i, &pointer);
+
+ /* Make sure the pointer seems valid */
+ if (pointer != 0xFFFF && pointer != 0) {
+ hw->eeprom.ops.read(hw, pointer, &length);
+
+ if (length != 0xFFFF && length != 0) {
+ for (j = pointer+1; j <= pointer+length; j++) {
+ hw->eeprom.ops.read(hw, j, &word);
+ checksum += word;
+ }
+ }
+ }
+ }
+
+ checksum = (u16)IXGBE_EEPROM_SUM - checksum;
+
+ return checksum;
+}
+
+/**
+ * ixgbe_validate_eeprom_checksum_generic - Validate EEPROM checksum
+ * @hw: pointer to hardware structure
+ * @checksum_val: calculated checksum
+ *
+ * Performs checksum calculation and validates the EEPROM checksum. If the
+ * caller does not need checksum_val, the value can be NULL.
+ **/
+s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
+ u16 *checksum_val)
+{
+ s32 status;
+ u16 checksum;
+ u16 read_checksum = 0;
+
+ /*
+ * Read the first word from the EEPROM. If this times out or fails, do
+ * not continue or we could be in for a very long wait while every
+ * EEPROM read fails
+ */
+ status = hw->eeprom.ops.read(hw, 0, &checksum);
+
+ if (status == 0) {
+ checksum = hw->eeprom.ops.calc_checksum(hw);
+
+ hw->eeprom.ops.read(hw, IXGBE_EEPROM_CHECKSUM, &read_checksum);
+
+ /*
+ * Verify read checksum from EEPROM is the same as
+ * calculated checksum
+ */
+ if (read_checksum != checksum)
+ status = IXGBE_ERR_EEPROM_CHECKSUM;
+
+ /* If the user cares, return the calculated checksum */
+ if (checksum_val)
+ *checksum_val = checksum;
+ } else {
+ hw_dbg(hw, "EEPROM read failed\n");
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_update_eeprom_checksum_generic - Updates the EEPROM checksum
+ * @hw: pointer to hardware structure
+ **/
+s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw)
+{
+ s32 status;
+ u16 checksum;
+
+ /*
+ * Read the first word from the EEPROM. If this times out or fails, do
+ * not continue or we could be in for a very long wait while every
+ * EEPROM read fails
+ */
+ status = hw->eeprom.ops.read(hw, 0, &checksum);
+
+ if (status == 0) {
+ checksum = hw->eeprom.ops.calc_checksum(hw);
+ status = hw->eeprom.ops.write(hw, IXGBE_EEPROM_CHECKSUM,
+ checksum);
+ } else {
+ hw_dbg(hw, "EEPROM read failed\n");
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_validate_mac_addr - Validate MAC address
+ * @mac_addr: pointer to MAC address.
+ *
+ * Tests a MAC address to ensure it is a valid Individual Address
+ **/
+s32 ixgbe_validate_mac_addr(u8 *mac_addr)
+{
+ s32 status = 0;
+
+ /* Make sure it is not a multicast address */
+ if (IXGBE_IS_MULTICAST(mac_addr)) {
+ hw_dbg(hw, "MAC address is multicast\n");
+ status = IXGBE_ERR_INVALID_MAC_ADDR;
+ /* Not a broadcast address */
+ } else if (IXGBE_IS_BROADCAST(mac_addr)) {
+ hw_dbg(hw, "MAC address is broadcast\n");
+ status = IXGBE_ERR_INVALID_MAC_ADDR;
+ /* Reject the zero address */
+ } else if (mac_addr[0] == 0 && mac_addr[1] == 0 && mac_addr[2] == 0 &&
+ mac_addr[3] == 0 && mac_addr[4] == 0 && mac_addr[5] == 0) {
+ hw_dbg(hw, "MAC address is all zeros\n");
+ status = IXGBE_ERR_INVALID_MAC_ADDR;
+ }
+ return status;
+}
+
+/**
+ * ixgbe_set_rar_generic - Set Rx address register
+ * @hw: pointer to hardware structure
+ * @index: Receive address register to write
+ * @addr: Address to put into receive address register
+ * @vmdq: VMDq "set" or "pool" index
+ * @enable_addr: set flag that address is active
+ *
+ * Puts an ethernet address into a receive address register.
+ **/
+s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+ u32 enable_addr)
+{
+ u32 rar_low, rar_high;
+ u32 rar_entries = hw->mac.num_rar_entries;
+
+ /* Make sure we are using a valid rar index range */
+ if (index >= rar_entries) {
+ hw_dbg(hw, "RAR index %d is out of range.\n", index);
+ return IXGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ /* setup VMDq pool selection before this RAR gets enabled */
+ hw->mac.ops.set_vmdq(hw, index, vmdq);
+
+ /*
+ * HW expects these in little endian so we reverse the byte
+ * order from network order (big endian) to little endian
+ */
+ rar_low = ((u32)addr[0] |
+ ((u32)addr[1] << 8) |
+ ((u32)addr[2] << 16) |
+ ((u32)addr[3] << 24));
+ /*
+ * Some parts put the VMDq setting in the extra RAH bits,
+ * so save everything except the lower 16 bits that hold part
+ * of the address and the address valid bit.
+ */
+ rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index));
+ rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV);
+ rar_high |= ((u32)addr[4] | ((u32)addr[5] << 8));
+
+ if (enable_addr != 0)
+ rar_high |= IXGBE_RAH_AV;
+
+ IXGBE_WRITE_REG(hw, IXGBE_RAL(index), rar_low);
+ IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high);
+
+ return 0;
+}
+
+/**
+ * ixgbe_clear_rar_generic - Remove Rx address register
+ * @hw: pointer to hardware structure
+ * @index: Receive address register to write
+ *
+ * Clears an ethernet address from a receive address register.
+ **/
+s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index)
+{
+ u32 rar_high;
+ u32 rar_entries = hw->mac.num_rar_entries;
+
+ /* Make sure we are using a valid rar index range */
+ if (index >= rar_entries) {
+ hw_dbg(hw, "RAR index %d is out of range.\n", index);
+ return IXGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ /*
+ * Some parts put the VMDq setting in the extra RAH bits,
+ * so save everything except the lower 16 bits that hold part
+ * of the address and the address valid bit.
+ */
+ rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index));
+ rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV);
+
+ IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high);
+
+ /* clear VMDq pool/queue selection for this RAR */
+ hw->mac.ops.clear_vmdq(hw, index, IXGBE_CLEAR_VMDQ_ALL);
+
+ return 0;
+}
+
+/**
+ * ixgbe_init_rx_addrs_generic - Initializes receive address filters.
+ * @hw: pointer to hardware structure
+ *
+ * Places the MAC address in receive address register 0 and clears the rest
+ * of the receive address registers. Clears the multicast table. Assumes
+ * the receiver is in reset when the routine is called.
+ **/
+s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw)
+{
+ u32 i;
+ u32 rar_entries = hw->mac.num_rar_entries;
+
+ /*
+ * If the current mac address is valid, assume it is a software override
+ * to the permanent address.
+ * Otherwise, use the permanent address from the eeprom.
+ */
+ if (ixgbe_validate_mac_addr(hw->mac.addr) ==
+ IXGBE_ERR_INVALID_MAC_ADDR) {
+ /* Get the MAC address from the RAR0 for later reference */
+ hw->mac.ops.get_mac_addr(hw, hw->mac.addr);
+
+ hw_dbg(hw, " Keeping Current RAR0 Addr =%.2X %.2X %.2X ",
+ hw->mac.addr[0], hw->mac.addr[1],
+ hw->mac.addr[2]);
+ hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3],
+ hw->mac.addr[4], hw->mac.addr[5]);
+ } else {
+ /* Setup the receive address. */
+ hw_dbg(hw, "Overriding MAC Address in RAR[0]\n");
+ hw_dbg(hw, " New MAC Addr =%.2X %.2X %.2X ",
+ hw->mac.addr[0], hw->mac.addr[1],
+ hw->mac.addr[2]);
+ hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3],
+ hw->mac.addr[4], hw->mac.addr[5]);
+
+ hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
+
+ /* clear VMDq pool/queue selection for RAR 0 */
+ hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL);
+ }
+ hw->addr_ctrl.overflow_promisc = 0;
+
+ hw->addr_ctrl.rar_used_count = 1;
+
+ /* Zero out the other receive addresses. */
+ hw_dbg(hw, "Clearing RAR[1-%d]\n", rar_entries - 1);
+ for (i = 1; i < rar_entries; i++) {
+ IXGBE_WRITE_REG(hw, IXGBE_RAL(i), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_RAH(i), 0);
+ }
+
+ /* Clear the MTA */
+ hw->addr_ctrl.mta_in_use = 0;
+ IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type);
+
+ hw_dbg(hw, " Clearing MTA\n");
+ for (i = 0; i < hw->mac.mcft_size; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_MTA(i), 0);
+
+ ixgbe_init_uta_tables(hw);
+
+ return 0;
+}
+
+/**
+ * ixgbe_add_uc_addr - Adds a secondary unicast address.
+ * @hw: pointer to hardware structure
+ * @addr: new address
+ *
+ * Adds it to unused receive address register or goes into promiscuous mode.
+ **/
+void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
+{
+ u32 rar_entries = hw->mac.num_rar_entries;
+ u32 rar;
+
+ hw_dbg(hw, " UC Addr = %.2X %.2X %.2X %.2X %.2X %.2X\n",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+ /*
+ * Place this address in the RAR if there is room,
+ * else put the controller into promiscuous mode
+ */
+ if (hw->addr_ctrl.rar_used_count < rar_entries) {
+ rar = hw->addr_ctrl.rar_used_count;
+ hw->mac.ops.set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV);
+ hw_dbg(hw, "Added a secondary address to RAR[%d]\n", rar);
+ hw->addr_ctrl.rar_used_count++;
+ } else {
+ hw->addr_ctrl.overflow_promisc++;
+ }
+
+ hw_dbg(hw, "ixgbe_add_uc_addr Complete\n");
+}
+
+/**
+ * ixgbe_update_uc_addr_list_generic - Updates MAC list of secondary addresses
+ * @hw: pointer to hardware structure
+ * @addr_list: the list of new addresses
+ * @addr_count: number of addresses
+ * @next: iterator function to walk the address list
+ *
+ * The given list replaces any existing list. Clears the secondary addrs from
+ * receive address registers. Uses unused receive address registers for the
+ * first secondary addresses, and falls back to promiscuous mode as needed.
+ *
+ * Drivers using secondary unicast addresses must set user_set_promisc when
+ * manually putting the device into promiscuous mode.
+ **/
+s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list,
+ u32 addr_count, ixgbe_mc_addr_itr next)
+{
+ u8 *addr;
+ u32 i;
+ u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc;
+ u32 uc_addr_in_use;
+ u32 fctrl;
+ u32 vmdq;
+
+ /*
+ * Clear accounting of old secondary address list,
+ * don't count RAR[0]
+ */
+ uc_addr_in_use = hw->addr_ctrl.rar_used_count - 1;
+ hw->addr_ctrl.rar_used_count -= uc_addr_in_use;
+ hw->addr_ctrl.overflow_promisc = 0;
+
+ /* Zero out the other receive addresses */
+ hw_dbg(hw, "Clearing RAR[1-%d]\n", uc_addr_in_use+1);
+ for (i = 0; i < uc_addr_in_use; i++) {
+ IXGBE_WRITE_REG(hw, IXGBE_RAL(1+i), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_RAH(1+i), 0);
+ }
+
+ /* Add the new addresses */
+ for (i = 0; i < addr_count; i++) {
+ hw_dbg(hw, " Adding the secondary addresses:\n");
+ addr = next(hw, &addr_list, &vmdq);
+ ixgbe_add_uc_addr(hw, addr, vmdq);
+ }
+
+ if (hw->addr_ctrl.overflow_promisc) {
+ /* enable promisc if not already in overflow or set by user */
+ if (!old_promisc_setting && !hw->addr_ctrl.user_set_promisc) {
+ hw_dbg(hw, " Entering address overflow promisc mode\n");
+ fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+ fctrl |= IXGBE_FCTRL_UPE;
+ IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
+ }
+ } else {
+ /* only disable if set by overflow, not by user */
+ if (old_promisc_setting && !hw->addr_ctrl.user_set_promisc) {
+ hw_dbg(hw, " Leaving address overflow promisc mode\n");
+ fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+ fctrl &= ~IXGBE_FCTRL_UPE;
+ IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
+ }
+ }
+
+ hw_dbg(hw, "ixgbe_update_uc_addr_list_generic Complete\n");
+ return 0;
+}
+
+/**
+ * ixgbe_mta_vector - Determines bit-vector in multicast table to set
+ * @hw: pointer to hardware structure
+ * @mc_addr: the multicast address
+ *
+ * Extracts the 12 bits, from a multicast address, to determine which
+ * bit-vector to set in the multicast table. The hardware uses 12 bits, from
+ * incoming rx multicast addresses, to determine the bit-vector to check in
+ * the MTA. Which of the 4 combination, of 12-bits, the hardware uses is set
+ * by the MO field of the MCSTCTRL. The MO field is set during initialization
+ * to mc_filter_type.
+ **/
+static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr)
+{
+ u32 vector = 0;
+
+ switch (hw->mac.mc_filter_type) {
+ case 0: /* use bits [47:36] of the address */
+ vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4));
+ break;
+ case 1: /* use bits [46:35] of the address */
+ vector = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5));
+ break;
+ case 2: /* use bits [45:34] of the address */
+ vector = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6));
+ break;
+ case 3: /* use bits [43:32] of the address */
+ vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8));
+ break;
+ default: /* Invalid mc_filter_type */
+ hw_dbg(hw, "MC filter type param set incorrectly\n");
+ break;
+ }
+
+ /* vector can only be 12-bits or boundary will be exceeded */
+ vector &= 0xFFF;
+ return vector;
+}
+
+/**
+ * ixgbe_set_mta - Set bit-vector in multicast table
+ * @hw: pointer to hardware structure
+ * @hash_value: Multicast address hash value
+ *
+ * Sets the bit-vector in the multicast table.
+ **/
+void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr)
+{
+ u32 vector;
+ u32 vector_bit;
+ u32 vector_reg;
+
+ hw->addr_ctrl.mta_in_use++;
+
+ vector = ixgbe_mta_vector(hw, mc_addr);
+ hw_dbg(hw, " bit-vector = 0x%03X\n", vector);
+
+ /*
+ * The MTA is a register array of 128 32-bit registers. It is treated
+ * like an array of 4096 bits. We want to set bit
+ * BitArray[vector_value]. So we figure out what register the bit is
+ * in, read it, OR in the new bit, then write back the new value. The
+ * register is determined by the upper 7 bits of the vector value and
+ * the bit within that register are determined by the lower 5 bits of
+ * the value.
+ */
+ vector_reg = (vector >> 5) & 0x7F;
+ vector_bit = vector & 0x1F;
+ hw->mac.mta_shadow[vector_reg] |= (1 << vector_bit);
+}
+
+/**
+ * ixgbe_update_mc_addr_list_generic - Updates MAC list of multicast addresses
+ * @hw: pointer to hardware structure
+ * @mc_addr_list: the list of new multicast addresses
+ * @mc_addr_count: number of addresses
+ * @next: iterator function to walk the multicast address list
+ * @clear: flag, when set clears the table beforehand
+ *
+ * When the clear flag is set, the given list replaces any existing list.
+ * Hashes the given addresses into the multicast table.
+ **/
+s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list,
+ u32 mc_addr_count, ixgbe_mc_addr_itr next,
+ bool clear)
+{
+ u32 i;
+ u32 vmdq;
+
+ /*
+ * Set the new number of MC addresses that we are being requested to
+ * use.
+ */
+ hw->addr_ctrl.num_mc_addrs = mc_addr_count;
+ hw->addr_ctrl.mta_in_use = 0;
+
+ /* Clear mta_shadow */
+ if (clear) {
+ hw_dbg(hw, " Clearing MTA\n");
+ memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+ }
+
+ /* Update mta_shadow */
+ for (i = 0; i < mc_addr_count; i++) {
+ hw_dbg(hw, " Adding the multicast addresses:\n");
+ ixgbe_set_mta(hw, next(hw, &mc_addr_list, &vmdq));
+ }
+
+ /* Enable mta */
+ for (i = 0; i < hw->mac.mcft_size; i++)
+ IXGBE_WRITE_REG_ARRAY(hw, IXGBE_MTA(0), i,
+ hw->mac.mta_shadow[i]);
+
+ if (hw->addr_ctrl.mta_in_use > 0)
+ IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL,
+ IXGBE_MCSTCTRL_MFE | hw->mac.mc_filter_type);
+
+ hw_dbg(hw, "ixgbe_update_mc_addr_list_generic Complete\n");
+ return 0;
+}
+
+/**
+ * ixgbe_enable_mc_generic - Enable multicast address in RAR
+ * @hw: pointer to hardware structure
+ *
+ * Enables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw)
+{
+ struct ixgbe_addr_filter_info *a = &hw->addr_ctrl;
+
+ if (a->mta_in_use > 0)
+ IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, IXGBE_MCSTCTRL_MFE |
+ hw->mac.mc_filter_type);
+
+ return 0;
+}
+
+/**
+ * ixgbe_disable_mc_generic - Disable multicast address in RAR
+ * @hw: pointer to hardware structure
+ *
+ * Disables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw)
+{
+ struct ixgbe_addr_filter_info *a = &hw->addr_ctrl;
+
+ if (a->mta_in_use > 0)
+ IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type);
+
+ return 0;
+}
+
+/**
+ * ixgbe_fc_enable_generic - Enable flow control
+ * @hw: pointer to hardware structure
+ *
+ * Enable flow control according to the current settings.
+ **/
+s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
+{
+ s32 ret_val = 0;
+ u32 mflcn_reg, fccfg_reg;
+ u32 reg;
+ u32 fcrtl, fcrth;
+ int i;
+
+ /* Validate the water mark configuration */
+ if (!hw->fc.pause_time) {
+ ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+ goto out;
+ }
+
+ /* Low water mark of zero causes XOFF floods */
+ for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
+ if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+ hw->fc.high_water[i]) {
+ if (!hw->fc.low_water[i] ||
+ hw->fc.low_water[i] >= hw->fc.high_water[i]) {
+ hw_dbg(hw, "Invalid water mark configuration\n");
+ ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+ goto out;
+ }
+ }
+ }
+
+ /* Negotiate the fc mode to use */
+ ixgbe_fc_autoneg(hw);
+
+ /* Disable any previous flow control settings */
+ mflcn_reg = IXGBE_READ_REG(hw, IXGBE_MFLCN);
+ mflcn_reg &= ~(IXGBE_MFLCN_RPFCE_MASK | IXGBE_MFLCN_RFCE);
+
+ fccfg_reg = IXGBE_READ_REG(hw, IXGBE_FCCFG);
+ fccfg_reg &= ~(IXGBE_FCCFG_TFCE_802_3X | IXGBE_FCCFG_TFCE_PRIORITY);
+
+ /*
+ * The possible values of fc.current_mode are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause frames,
+ * but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames but
+ * we do not support receiving pause frames).
+ * 3: Both Rx and Tx flow control (symmetric) are enabled.
+ * other: Invalid.
+ */
+ switch (hw->fc.current_mode) {
+ case ixgbe_fc_none:
+ /*
+ * Flow control is disabled by software override or autoneg.
+ * The code below will actually disable it in the HW.
+ */
+ break;
+ case ixgbe_fc_rx_pause:
+ /*
+ * Rx Flow control is enabled and Tx Flow control is
+ * disabled by software override. Since there really
+ * isn't a way to advertise that we are capable of RX
+ * Pause ONLY, we will advertise that we support both
+ * symmetric and asymmetric Rx PAUSE. Later, we will
+ * disable the adapter's ability to send PAUSE frames.
+ */
+ mflcn_reg |= IXGBE_MFLCN_RFCE;
+ break;
+ case ixgbe_fc_tx_pause:
+ /*
+ * Tx Flow control is enabled, and Rx Flow control is
+ * disabled by software override.
+ */
+ fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X;
+ break;
+ case ixgbe_fc_full:
+ /* Flow control (both Rx and Tx) is enabled by SW override. */
+ mflcn_reg |= IXGBE_MFLCN_RFCE;
+ fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X;
+ break;
+ default:
+ hw_dbg(hw, "Flow control param set incorrectly\n");
+ ret_val = IXGBE_ERR_CONFIG;
+ goto out;
+ break;
+ }
+
+ /* Set 802.3x based flow control settings. */
+ mflcn_reg |= IXGBE_MFLCN_DPF;
+ IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg);
+ IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg);
+
+
+ /* Set up and enable Rx high/low water mark thresholds, enable XON. */
+ for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
+ if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+ hw->fc.high_water[i]) {
+ fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl);
+ fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
+ } else {
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), 0);
+ /*
+ * In order to prevent Tx hangs when the internal Tx
+ * switch is enabled we must set the high water mark
+ * to the maximum FCRTH value. This allows the Tx
+ * switch to function even under heavy Rx workloads.
+ */
+ fcrth = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 32;
+ }
+
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), fcrth);
+ }
+
+ /* Configure pause time (2 TCs per register) */
+ reg = hw->fc.pause_time * 0x00010001;
+ for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++)
+ IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
+
+ /* Configure flow control refresh threshold value */
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2);
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_negotiate_fc - Negotiate flow control
+ * @hw: pointer to hardware structure
+ * @adv_reg: flow control advertised settings
+ * @lp_reg: link partner's flow control settings
+ * @adv_sym: symmetric pause bit in advertisement
+ * @adv_asm: asymmetric pause bit in advertisement
+ * @lp_sym: symmetric pause bit in link partner advertisement
+ * @lp_asm: asymmetric pause bit in link partner advertisement
+ *
+ * Find the intersection between advertised settings and link partner's
+ * advertised settings
+ **/
+static s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
+ u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
+{
+ if ((!(adv_reg)) || (!(lp_reg)))
+ return IXGBE_ERR_FC_NOT_NEGOTIATED;
+
+ if ((adv_reg & adv_sym) && (lp_reg & lp_sym)) {
+ /*
+ * Now we need to check if the user selected Rx ONLY
+ * of pause frames. In this case, we had to advertise
+ * FULL flow control because we could not advertise RX
+ * ONLY. Hence, we must now check to see if we need to
+ * turn OFF the TRANSMISSION of PAUSE frames.
+ */
+ if (hw->fc.requested_mode == ixgbe_fc_full) {
+ hw->fc.current_mode = ixgbe_fc_full;
+ hw_dbg(hw, "Flow Control = FULL.\n");
+ } else {
+ hw->fc.current_mode = ixgbe_fc_rx_pause;
+ hw_dbg(hw, "Flow Control=RX PAUSE frames only\n");
+ }
+ } else if (!(adv_reg & adv_sym) && (adv_reg & adv_asm) &&
+ (lp_reg & lp_sym) && (lp_reg & lp_asm)) {
+ hw->fc.current_mode = ixgbe_fc_tx_pause;
+ hw_dbg(hw, "Flow Control = TX PAUSE frames only.\n");
+ } else if ((adv_reg & adv_sym) && (adv_reg & adv_asm) &&
+ !(lp_reg & lp_sym) && (lp_reg & lp_asm)) {
+ hw->fc.current_mode = ixgbe_fc_rx_pause;
+ hw_dbg(hw, "Flow Control = RX PAUSE frames only.\n");
+ } else {
+ hw->fc.current_mode = ixgbe_fc_none;
+ hw_dbg(hw, "Flow Control = NONE.\n");
+ }
+ return 0;
+}
+
+/**
+ * ixgbe_fc_autoneg_fiber - Enable flow control on 1 gig fiber
+ * @hw: pointer to hardware structure
+ *
+ * Enable flow control according on 1 gig fiber.
+ **/
+static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw)
+{
+ u32 pcs_anadv_reg, pcs_lpab_reg, linkstat;
+ s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED;
+
+ /*
+ * On multispeed fiber at 1g, bail out if
+ * - link is up but AN did not complete, or if
+ * - link is up and AN completed but timed out
+ */
+
+ linkstat = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA);
+ if ((!!(linkstat & IXGBE_PCS1GLSTA_AN_COMPLETE) == 0) ||
+ (!!(linkstat & IXGBE_PCS1GLSTA_AN_TIMED_OUT) == 1))
+ goto out;
+
+ pcs_anadv_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
+ pcs_lpab_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP);
+
+ ret_val = ixgbe_negotiate_fc(hw, pcs_anadv_reg,
+ pcs_lpab_reg, IXGBE_PCS1GANA_SYM_PAUSE,
+ IXGBE_PCS1GANA_ASM_PAUSE,
+ IXGBE_PCS1GANA_SYM_PAUSE,
+ IXGBE_PCS1GANA_ASM_PAUSE);
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_fc_autoneg_backplane - Enable flow control IEEE clause 37
+ * @hw: pointer to hardware structure
+ *
+ * Enable flow control according to IEEE clause 37.
+ **/
+static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw)
+{
+ u32 links2, anlp1_reg, autoc_reg, links;
+ s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED;
+
+ /*
+ * On backplane, bail out if
+ * - backplane autoneg was not completed, or if
+ * - we are 82599 and link partner is not AN enabled
+ */
+ links = IXGBE_READ_REG(hw, IXGBE_LINKS);
+ if ((links & IXGBE_LINKS_KX_AN_COMP) == 0)
+ goto out;
+
+ if (hw->mac.type == ixgbe_mac_82599EB) {
+ links2 = IXGBE_READ_REG(hw, IXGBE_LINKS2);
+ if ((links2 & IXGBE_LINKS2_AN_SUPPORTED) == 0)
+ goto out;
+ }
+ /*
+ * Read the 10g AN autoc and LP ability registers and resolve
+ * local flow control settings accordingly
+ */
+ autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ anlp1_reg = IXGBE_READ_REG(hw, IXGBE_ANLP1);
+
+ ret_val = ixgbe_negotiate_fc(hw, autoc_reg,
+ anlp1_reg, IXGBE_AUTOC_SYM_PAUSE, IXGBE_AUTOC_ASM_PAUSE,
+ IXGBE_ANLP1_SYM_PAUSE, IXGBE_ANLP1_ASM_PAUSE);
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_fc_autoneg_copper - Enable flow control IEEE clause 37
+ * @hw: pointer to hardware structure
+ *
+ * Enable flow control according to IEEE clause 37.
+ **/
+static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw)
+{
+ u16 technology_ability_reg = 0;
+ u16 lp_technology_ability_reg = 0;
+
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &technology_ability_reg);
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_LP,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &lp_technology_ability_reg);
+
+ return ixgbe_negotiate_fc(hw, (u32)technology_ability_reg,
+ (u32)lp_technology_ability_reg,
+ IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE,
+ IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE);
+}
+
+/**
+ * ixgbe_fc_autoneg - Configure flow control
+ * @hw: pointer to hardware structure
+ *
+ * Compares our advertised flow control capabilities to those advertised by
+ * our link partner, and determines the proper flow control mode to use.
+ **/
+void ixgbe_fc_autoneg(struct ixgbe_hw *hw)
+{
+ s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED;
+ ixgbe_link_speed speed;
+ bool link_up;
+
+ /*
+ * AN should have completed when the cable was plugged in.
+ * Look for reasons to bail out. Bail out if:
+ * - FC autoneg is disabled, or if
+ * - link is not up.
+ */
+ if (hw->fc.disable_fc_autoneg)
+ goto out;
+
+ hw->mac.ops.check_link(hw, &speed, &link_up, false);
+ if (!link_up)
+ goto out;
+
+ switch (hw->phy.media_type) {
+ /* Autoneg flow control on fiber adapters */
+ case ixgbe_media_type_fiber:
+ if (speed == IXGBE_LINK_SPEED_1GB_FULL)
+ ret_val = ixgbe_fc_autoneg_fiber(hw);
+ break;
+
+ /* Autoneg flow control on backplane adapters */
+ case ixgbe_media_type_backplane:
+ ret_val = ixgbe_fc_autoneg_backplane(hw);
+ break;
+
+ /* Autoneg flow control on copper adapters */
+ case ixgbe_media_type_copper:
+ if (ixgbe_device_supports_autoneg_fc(hw) == 0)
+ ret_val = ixgbe_fc_autoneg_copper(hw);
+ break;
+
+ default:
+ break;
+ }
+
+out:
+ if (ret_val == 0) {
+ hw->fc.fc_was_autonegged = true;
+ } else {
+ hw->fc.fc_was_autonegged = false;
+ hw->fc.current_mode = hw->fc.requested_mode;
+ }
+}
+
+/**
+ * ixgbe_disable_pcie_master - Disable PCI-express master access
+ * @hw: pointer to hardware structure
+ *
+ * Disables PCI-Express master access and verifies there are no pending
+ * requests. IXGBE_ERR_MASTER_REQUESTS_PENDING is returned if master disable
+ * bit hasn't caused the master requests to be disabled, else 0
+ * is returned signifying master requests disabled.
+ **/
+s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+ u32 i;
+
+ /* Always set this bit to ensure any future transactions are blocked */
+ IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS);
+
+ /* Exit if master requets are blocked */
+ if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO))
+ goto out;
+
+ /* Poll for master request bit to clear */
+ for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) {
+ udelay(100);
+ if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO))
+ goto out;
+ }
+
+ /*
+ * Two consecutive resets are required via CTRL.RST per datasheet
+ * 5.2.5.3.2 Master Disable. We set a flag to inform the reset routine
+ * of this need. The first reset prevents new master requests from
+ * being issued by our device. We then must wait 1usec or more for any
+ * remaining completions from the PCIe bus to trickle in, and then reset
+ * again to clear out any effects they may have had on our device.
+ */
+ hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n");
+ hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+
+ /*
+ * Before proceeding, make sure that the PCIe block does not have
+ * transactions pending.
+ */
+ for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) {
+ udelay(100);
+ if (!(IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_STATUS) &
+ IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING))
+ goto out;
+ }
+
+ hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n");
+ status = IXGBE_ERR_MASTER_REQUESTS_PENDING;
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_acquire_swfw_sync - Acquire SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to acquire
+ *
+ * Acquires the SWFW semaphore through the GSSR register for the specified
+ * function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask)
+{
+ u32 gssr;
+ u32 swmask = mask;
+ u32 fwmask = mask << 5;
+ s32 timeout = 200;
+
+ while (timeout) {
+ /*
+ * SW EEPROM semaphore bit is used for access to all
+ * SW_FW_SYNC/GSSR bits (not just EEPROM)
+ */
+ if (ixgbe_get_eeprom_semaphore(hw))
+ return IXGBE_ERR_SWFW_SYNC;
+
+ gssr = IXGBE_READ_REG(hw, IXGBE_GSSR);
+ if (!(gssr & (fwmask | swmask)))
+ break;
+
+ /*
+ * Firmware currently using resource (fwmask) or other software
+ * thread currently using resource (swmask)
+ */
+ ixgbe_release_eeprom_semaphore(hw);
+ msleep(5);
+ timeout--;
+ }
+
+ if (!timeout) {
+ hw_dbg(hw, "Driver can't access resource, SW_FW_SYNC timeout.\n");
+ return IXGBE_ERR_SWFW_SYNC;
+ }
+
+ gssr |= swmask;
+ IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr);
+
+ ixgbe_release_eeprom_semaphore(hw);
+ return 0;
+}
+
+/**
+ * ixgbe_release_swfw_sync - Release SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to release
+ *
+ * Releases the SWFW semaphore through the GSSR register for the specified
+ * function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask)
+{
+ u32 gssr;
+ u32 swmask = mask;
+
+ ixgbe_get_eeprom_semaphore(hw);
+
+ gssr = IXGBE_READ_REG(hw, IXGBE_GSSR);
+ gssr &= ~swmask;
+ IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr);
+
+ ixgbe_release_eeprom_semaphore(hw);
+}
+
+/**
+ * ixgbe_disable_sec_rx_path_generic - Stops the receive data path
+ * @hw: pointer to hardware structure
+ *
+ * Stops the receive data path and waits for the HW to internally empty
+ * the Rx security block
+ **/
+s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw)
+{
+#define IXGBE_MAX_SECRX_POLL 40
+
+ int i;
+ int secrxreg;
+
+ secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+ secrxreg |= IXGBE_SECRXCTRL_RX_DIS;
+ IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg);
+ for (i = 0; i < IXGBE_MAX_SECRX_POLL; i++) {
+ secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT);
+ if (secrxreg & IXGBE_SECRXSTAT_SECRX_RDY)
+ break;
+ else
+ /* Use interrupt-safe sleep just in case */
+ udelay(1000);
+ }
+
+ /* For informational purposes only */
+ if (i >= IXGBE_MAX_SECRX_POLL)
+ hw_dbg(hw, "Rx unit being enabled before security "
+ "path fully disabled. Continuing with init.\n");
+
+ return 0;
+}
+
+/**
+ * ixgbe_enable_sec_rx_path_generic - Enables the receive data path
+ * @hw: pointer to hardware structure
+ *
+ * Enables the receive data path.
+ **/
+s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw)
+{
+ int secrxreg;
+
+ secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+ secrxreg &= ~IXGBE_SECRXCTRL_RX_DIS;
+ IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg);
+ IXGBE_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+/**
+ * ixgbe_enable_rx_dma_generic - Enable the Rx DMA unit
+ * @hw: pointer to hardware structure
+ * @regval: register value to write to RXCTRL
+ *
+ * Enables the Rx DMA unit
+ **/
+s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval)
+{
+ IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval);
+
+ return 0;
+}
+
+/**
+ * ixgbe_blink_led_start_generic - Blink LED based on index.
+ * @hw: pointer to hardware structure
+ * @index: led number to blink
+ **/
+s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index)
+{
+ ixgbe_link_speed speed = 0;
+ bool link_up = 0;
+ u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+ /*
+ * Link must be up to auto-blink the LEDs;
+ * Force it if link is down.
+ */
+ hw->mac.ops.check_link(hw, &speed, &link_up, false);
+
+ if (!link_up) {
+ autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+ autoc_reg |= IXGBE_AUTOC_FLU;
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
+ IXGBE_WRITE_FLUSH(hw);
+ msleep(10);
+ }
+
+ led_reg &= ~IXGBE_LED_MODE_MASK(index);
+ led_reg |= IXGBE_LED_BLINK(index);
+ IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+ IXGBE_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+/**
+ * ixgbe_blink_led_stop_generic - Stop blinking LED based on index.
+ * @hw: pointer to hardware structure
+ * @index: led number to stop blinking
+ **/
+s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index)
+{
+ u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+ autoc_reg &= ~IXGBE_AUTOC_FLU;
+ autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
+
+ led_reg &= ~IXGBE_LED_MODE_MASK(index);
+ led_reg &= ~IXGBE_LED_BLINK(index);
+ led_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index);
+ IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+ IXGBE_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+/**
+ * ixgbe_get_san_mac_addr_offset - Get SAN MAC address offset from the EEPROM
+ * @hw: pointer to hardware structure
+ * @san_mac_offset: SAN MAC address offset
+ *
+ * This function will read the EEPROM location for the SAN MAC address
+ * pointer, and returns the value at that location. This is used in both
+ * get and set mac_addr routines.
+ **/
+static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
+ u16 *san_mac_offset)
+{
+ /*
+ * First read the EEPROM pointer to see if the MAC addresses are
+ * available.
+ */
+ hw->eeprom.ops.read(hw, IXGBE_SAN_MAC_ADDR_PTR, san_mac_offset);
+
+ return 0;
+}
+
+/**
+ * ixgbe_get_san_mac_addr_generic - SAN MAC address retrieval from the EEPROM
+ * @hw: pointer to hardware structure
+ * @san_mac_addr: SAN MAC address
+ *
+ * Reads the SAN MAC address from the EEPROM, if it's available. This is
+ * per-port, so set_lan_id() must be called before reading the addresses.
+ * set_lan_id() is called by identify_sfp(), but this cannot be relied
+ * upon for non-SFP connections, so we must call it here.
+ **/
+s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr)
+{
+ u16 san_mac_data, san_mac_offset;
+ u8 i;
+
+ /*
+ * First read the EEPROM pointer to see if the MAC addresses are
+ * available. If they're not, no point in calling set_lan_id() here.
+ */
+ ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset);
+
+ if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) {
+ /*
+ * No addresses available in this EEPROM. It's not an
+ * error though, so just wipe the local address and return.
+ */
+ for (i = 0; i < 6; i++)
+ san_mac_addr[i] = 0xFF;
+
+ goto san_mac_addr_out;
+ }
+
+ /* make sure we know which port we need to program */
+ hw->mac.ops.set_lan_id(hw);
+ /* apply the port offset to the address offset */
+ (hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) :
+ (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET);
+ for (i = 0; i < 3; i++) {
+ hw->eeprom.ops.read(hw, san_mac_offset, &san_mac_data);
+ san_mac_addr[i * 2] = (u8)(san_mac_data);
+ san_mac_addr[i * 2 + 1] = (u8)(san_mac_data >> 8);
+ san_mac_offset++;
+ }
+
+san_mac_addr_out:
+ return 0;
+}
+
+/**
+ * ixgbe_set_san_mac_addr_generic - Write the SAN MAC address to the EEPROM
+ * @hw: pointer to hardware structure
+ * @san_mac_addr: SAN MAC address
+ *
+ * Write a SAN MAC address to the EEPROM.
+ **/
+s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr)
+{
+ s32 status = 0;
+ u16 san_mac_data, san_mac_offset;
+ u8 i;
+
+ /* Look for SAN mac address pointer. If not defined, return */
+ ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset);
+
+ if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) {
+ status = IXGBE_ERR_NO_SAN_ADDR_PTR;
+ goto san_mac_addr_out;
+ }
+
+ /* Make sure we know which port we need to write */
+ hw->mac.ops.set_lan_id(hw);
+ /* Apply the port offset to the address offset */
+ (hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) :
+ (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET);
+
+ for (i = 0; i < 3; i++) {
+ san_mac_data = (u16)((u16)(san_mac_addr[i * 2 + 1]) << 8);
+ san_mac_data |= (u16)(san_mac_addr[i * 2]);
+ hw->eeprom.ops.write(hw, san_mac_offset, san_mac_data);
+ san_mac_offset++;
+ }
+
+san_mac_addr_out:
+ return status;
+}
+
+/**
+ * ixgbe_get_pcie_msix_count_generic - Gets MSI-X vector count
+ * @hw: pointer to hardware structure
+ *
+ * Read PCIe configuration space, and get the MSI-X vector count from
+ * the capabilities table.
+ **/
+u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw)
+{
+ u16 msix_count = 1;
+ u16 max_msix_count;
+ u16 pcie_offset;
+
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ pcie_offset = IXGBE_PCIE_MSIX_82598_CAPS;
+ max_msix_count = IXGBE_MAX_MSIX_VECTORS_82598;
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ pcie_offset = IXGBE_PCIE_MSIX_82599_CAPS;
+ max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599;
+ break;
+ default:
+ return msix_count;
+ }
+
+ msix_count = IXGBE_READ_PCIE_WORD(hw, pcie_offset);
+ msix_count &= IXGBE_PCIE_MSIX_TBL_SZ_MASK;
+
+ /* MSI-X count is zero-based in HW */
+ msix_count++;
+
+ if (msix_count > max_msix_count)
+ msix_count = max_msix_count;
+
+ return msix_count;
+}
+
+/**
+ * ixgbe_insert_mac_addr_generic - Find a RAR for this mac address
+ * @hw: pointer to hardware structure
+ * @addr: Address to put into receive address register
+ * @vmdq: VMDq pool to assign
+ *
+ * Puts an ethernet address into a receive address register, or
+ * finds the rar that it is aleady in; adds to the pool list
+ **/
+s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
+{
+ static const u32 NO_EMPTY_RAR_FOUND = 0xFFFFFFFF;
+ u32 first_empty_rar = NO_EMPTY_RAR_FOUND;
+ u32 rar;
+ u32 rar_low, rar_high;
+ u32 addr_low, addr_high;
+
+ /* swap bytes for HW little endian */
+ addr_low = addr[0] | (addr[1] << 8)
+ | (addr[2] << 16)
+ | (addr[3] << 24);
+ addr_high = addr[4] | (addr[5] << 8);
+
+ /*
+ * Either find the mac_id in rar or find the first empty space.
+ * rar_highwater points to just after the highest currently used
+ * rar in order to shorten the search. It grows when we add a new
+ * rar to the top.
+ */
+ for (rar = 0; rar < hw->mac.rar_highwater; rar++) {
+ rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar));
+
+ if (((IXGBE_RAH_AV & rar_high) == 0)
+ && first_empty_rar == NO_EMPTY_RAR_FOUND) {
+ first_empty_rar = rar;
+ } else if ((rar_high & 0xFFFF) == addr_high) {
+ rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(rar));
+ if (rar_low == addr_low)
+ break; /* found it already in the rars */
+ }
+ }
+
+ if (rar < hw->mac.rar_highwater) {
+ /* already there so just add to the pool bits */
+ ixgbe_set_vmdq(hw, rar, vmdq);
+ } else if (first_empty_rar != NO_EMPTY_RAR_FOUND) {
+ /* stick it into first empty RAR slot we found */
+ rar = first_empty_rar;
+ ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV);
+ } else if (rar == hw->mac.rar_highwater) {
+ /* add it to the top of the list and inc the highwater mark */
+ ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV);
+ hw->mac.rar_highwater++;
+ } else if (rar >= hw->mac.num_rar_entries) {
+ return IXGBE_ERR_INVALID_MAC_ADDR;
+ }
+
+ /*
+ * If we found rar[0], make sure the default pool bit (we use pool 0)
+ * remains cleared to be sure default pool packets will get delivered
+ */
+ if (rar == 0)
+ ixgbe_clear_vmdq(hw, rar, 0);
+
+ return rar;
+}
+
+/**
+ * ixgbe_clear_vmdq_generic - Disassociate a VMDq pool index from a rx address
+ * @hw: pointer to hardware struct
+ * @rar: receive address register index to disassociate
+ * @vmdq: VMDq pool index to remove from the rar
+ **/
+s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+ u32 mpsar_lo, mpsar_hi;
+ u32 rar_entries = hw->mac.num_rar_entries;
+
+ /* Make sure we are using a valid rar index range */
+ if (rar >= rar_entries) {
+ hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+ return IXGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ mpsar_lo = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar));
+ mpsar_hi = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar));
+
+ if (!mpsar_lo && !mpsar_hi)
+ goto done;
+
+ if (vmdq == IXGBE_CLEAR_VMDQ_ALL) {
+ if (mpsar_lo) {
+ IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 0);
+ mpsar_lo = 0;
+ }
+ if (mpsar_hi) {
+ IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 0);
+ mpsar_hi = 0;
+ }
+ } else if (vmdq < 32) {
+ mpsar_lo &= ~(1 << vmdq);
+ IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar_lo);
+ } else {
+ mpsar_hi &= ~(1 << (vmdq - 32));
+ IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar_hi);
+ }
+
+ /* was that the last pool using this rar? */
+ if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0)
+ hw->mac.ops.clear_rar(hw, rar);
+done:
+ return 0;
+}
+
+/**
+ * ixgbe_set_vmdq_generic - Associate a VMDq pool index with a rx address
+ * @hw: pointer to hardware struct
+ * @rar: receive address register index to associate with a VMDq index
+ * @vmdq: VMDq pool index
+ **/
+s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+ u32 mpsar;
+ u32 rar_entries = hw->mac.num_rar_entries;
+
+ /* Make sure we are using a valid rar index range */
+ if (rar >= rar_entries) {
+ hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+ return IXGBE_ERR_INVALID_ARGUMENT;
+ }
+
+ if (vmdq < 32) {
+ mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar));
+ mpsar |= 1 << vmdq;
+ IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar);
+ } else {
+ mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar));
+ mpsar |= 1 << (vmdq - 32);
+ IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar);
+ }
+ return 0;
+}
+
+/**
+ * This function should only be involved in the IOV mode.
+ * In IOV mode, Default pool is next pool after the number of
+ * VFs advertized and not 0.
+ * MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index]
+ *
+ * ixgbe_set_vmdq_san_mac - Associate default VMDq pool index with a rx address
+ * @hw: pointer to hardware struct
+ * @vmdq: VMDq pool index
+ **/
+s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq)
+{
+ u32 mpsar;
+ u32 rar = hw->mac.san_mac_rar_index;
+
+ if (vmdq < 32) {
+ mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar));
+ mpsar |= 1 << vmdq;
+ IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar);
+ } else {
+ mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar));
+ mpsar |= 1 << (vmdq - 32);
+ IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar);
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_init_uta_tables_generic - Initialize the Unicast Table Array
+ * @hw: pointer to hardware structure
+ **/
+s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw)
+{
+ int i;
+
+ hw_dbg(hw, " Clearing UTA\n");
+
+ for (i = 0; i < 128; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_UTA(i), 0);
+
+ return 0;
+}
+
+/**
+ * ixgbe_find_vlvf_slot - find the vlanid or the first empty slot
+ * @hw: pointer to hardware structure
+ * @vlan: VLAN id to write to VLAN filter
+ *
+ * return the VLVF index where this VLAN id should be placed
+ *
+ **/
+s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan)
+{
+ u32 bits = 0;
+ u32 first_empty_slot = 0;
+ s32 regindex;
+
+ /* short cut the special case */
+ if (vlan == 0)
+ return 0;
+
+ /*
+ * Search for the vlan id in the VLVF entries. Save off the first empty
+ * slot found along the way
+ */
+ for (regindex = 1; regindex < IXGBE_VLVF_ENTRIES; regindex++) {
+ bits = IXGBE_READ_REG(hw, IXGBE_VLVF(regindex));
+ if (!bits && !(first_empty_slot))
+ first_empty_slot = regindex;
+ else if ((bits & 0x0FFF) == vlan)
+ break;
+ }
+
+ /*
+ * If regindex is less than IXGBE_VLVF_ENTRIES, then we found the vlan
+ * in the VLVF. Else use the first empty VLVF register for this
+ * vlan id.
+ */
+ if (regindex >= IXGBE_VLVF_ENTRIES) {
+ if (first_empty_slot)
+ regindex = first_empty_slot;
+ else {
+ hw_dbg(hw, "No space in VLVF.\n");
+ regindex = IXGBE_ERR_NO_SPACE;
+ }
+ }
+
+ return regindex;
+}
+
+/**
+ * ixgbe_set_vfta_generic - Set VLAN filter table
+ * @hw: pointer to hardware structure
+ * @vlan: VLAN id to write to VLAN filter
+ * @vind: VMDq output index that maps queue to VLAN id in VFVFB
+ * @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ *
+ * Turn on/off specified VLAN in the VLAN filter table.
+ **/
+s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+ bool vlan_on)
+{
+ s32 regindex;
+ u32 bitindex;
+ u32 vfta;
+ u32 targetbit;
+ s32 ret_val = 0;
+ bool vfta_changed = false;
+
+ if (vlan > 4095)
+ return IXGBE_ERR_PARAM;
+
+ /*
+ * this is a 2 part operation - first the VFTA, then the
+ * VLVF and VLVFB if VT Mode is set
+ * We don't write the VFTA until we know the VLVF part succeeded.
+ */
+
+ /* Part 1
+ * The VFTA is a bitstring made up of 128 32-bit registers
+ * that enable the particular VLAN id, much like the MTA:
+ * bits[11-5]: which register
+ * bits[4-0]: which bit in the register
+ */
+ regindex = (vlan >> 5) & 0x7F;
+ bitindex = vlan & 0x1F;
+ targetbit = (1 << bitindex);
+ vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex));
+
+ if (vlan_on) {
+ if (!(vfta & targetbit)) {
+ vfta |= targetbit;
+ vfta_changed = true;
+ }
+ } else {
+ if ((vfta & targetbit)) {
+ vfta &= ~targetbit;
+ vfta_changed = true;
+ }
+ }
+
+ /* Part 2
+ * Call ixgbe_set_vlvf_generic to set VLVFB and VLVF
+ */
+ ret_val = ixgbe_set_vlvf_generic(hw, vlan, vind, vlan_on,
+ &vfta_changed);
+ if (ret_val != 0)
+ return ret_val;
+
+ if (vfta_changed)
+ IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), vfta);
+
+ return 0;
+}
+
+/**
+ * ixgbe_set_vlvf_generic - Set VLAN Pool Filter
+ * @hw: pointer to hardware structure
+ * @vlan: VLAN id to write to VLAN filter
+ * @vind: VMDq output index that maps queue to VLAN id in VFVFB
+ * @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ * @vfta_changed: pointer to boolean flag which indicates whether VFTA
+ * should be changed
+ *
+ * Turn on/off specified bit in VLVF table.
+ **/
+s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+ bool vlan_on, bool *vfta_changed)
+{
+ u32 vt;
+
+ if (vlan > 4095)
+ return IXGBE_ERR_PARAM;
+
+ /* If VT Mode is set
+ * Either vlan_on
+ * make sure the vlan is in VLVF
+ * set the vind bit in the matching VLVFB
+ * Or !vlan_on
+ * clear the pool bit and possibly the vind
+ */
+ vt = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
+ if (vt & IXGBE_VT_CTL_VT_ENABLE) {
+ s32 vlvf_index;
+ u32 bits;
+
+ vlvf_index = ixgbe_find_vlvf_slot(hw, vlan);
+ if (vlvf_index < 0)
+ return vlvf_index;
+
+ if (vlan_on) {
+ /* set the pool bit */
+ if (vind < 32) {
+ bits = IXGBE_READ_REG(hw,
+ IXGBE_VLVFB(vlvf_index * 2));
+ bits |= (1 << vind);
+ IXGBE_WRITE_REG(hw,
+ IXGBE_VLVFB(vlvf_index * 2),
+ bits);
+ } else {
+ bits = IXGBE_READ_REG(hw,
+ IXGBE_VLVFB((vlvf_index * 2) + 1));
+ bits |= (1 << (vind - 32));
+ IXGBE_WRITE_REG(hw,
+ IXGBE_VLVFB((vlvf_index * 2) + 1),
+ bits);
+ }
+ } else {
+ /* clear the pool bit */
+ if (vind < 32) {
+ bits = IXGBE_READ_REG(hw,
+ IXGBE_VLVFB(vlvf_index * 2));
+ bits &= ~(1 << vind);
+ IXGBE_WRITE_REG(hw,
+ IXGBE_VLVFB(vlvf_index * 2),
+ bits);
+ bits |= IXGBE_READ_REG(hw,
+ IXGBE_VLVFB((vlvf_index * 2) + 1));
+ } else {
+ bits = IXGBE_READ_REG(hw,
+ IXGBE_VLVFB((vlvf_index * 2) + 1));
+ bits &= ~(1 << (vind - 32));
+ IXGBE_WRITE_REG(hw,
+ IXGBE_VLVFB((vlvf_index * 2) + 1),
+ bits);
+ bits |= IXGBE_READ_REG(hw,
+ IXGBE_VLVFB(vlvf_index * 2));
+ }
+ }
+
+ /*
+ * If there are still bits set in the VLVFB registers
+ * for the VLAN ID indicated we need to see if the
+ * caller is requesting that we clear the VFTA entry bit.
+ * If the caller has requested that we clear the VFTA
+ * entry bit but there are still pools/VFs using this VLAN
+ * ID entry then ignore the request. We're not worried
+ * about the case where we're turning the VFTA VLAN ID
+ * entry bit on, only when requested to turn it off as
+ * there may be multiple pools and/or VFs using the
+ * VLAN ID entry. In that case we cannot clear the
+ * VFTA bit until all pools/VFs using that VLAN ID have also
+ * been cleared. This will be indicated by "bits" being
+ * zero.
+ */
+ if (bits) {
+ IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index),
+ (IXGBE_VLVF_VIEN | vlan));
+ if ((!vlan_on) && (vfta_changed != NULL)) {
+ /* someone wants to clear the vfta entry
+ * but some pools/VFs are still using it.
+ * Ignore it. */
+ *vfta_changed = false;
+ }
+ } else
+ IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0);
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_clear_vfta_generic - Clear VLAN filter table
+ * @hw: pointer to hardware structure
+ *
+ * Clears the VLAN filer table, and the VMDq index associated with the filter
+ **/
+s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw)
+{
+ u32 offset;
+
+ for (offset = 0; offset < hw->mac.vft_size; offset++)
+ IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0);
+
+ for (offset = 0; offset < IXGBE_VLVF_ENTRIES; offset++) {
+ IXGBE_WRITE_REG(hw, IXGBE_VLVF(offset), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_VLVFB((offset * 2) + 1), 0);
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_check_mac_link_generic - Determine link and speed status
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @link_up: true when link is up
+ * @link_up_wait_to_complete: bool used to wait for link up or not
+ *
+ * Reads the links register to determine if link is up and the current speed
+ **/
+s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+ bool *link_up, bool link_up_wait_to_complete)
+{
+ u32 links_reg, links_orig;
+ u32 i;
+
+ /* clear the old state */
+ links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS);
+
+ links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+
+ if (links_orig != links_reg) {
+ hw_dbg(hw, "LINKS changed from %08X to %08X\n",
+ links_orig, links_reg);
+ }
+
+ if (link_up_wait_to_complete) {
+ for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
+ if (links_reg & IXGBE_LINKS_UP) {
+ *link_up = true;
+ break;
+ } else {
+ *link_up = false;
+ }
+ msleep(100);
+ links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+ }
+ } else {
+ if (links_reg & IXGBE_LINKS_UP)
+ *link_up = true;
+ else
+ *link_up = false;
+ }
+
+ if ((links_reg & IXGBE_LINKS_SPEED_82599) ==
+ IXGBE_LINKS_SPEED_10G_82599)
+ *speed = IXGBE_LINK_SPEED_10GB_FULL;
+ else if ((links_reg & IXGBE_LINKS_SPEED_82599) ==
+ IXGBE_LINKS_SPEED_1G_82599)
+ *speed = IXGBE_LINK_SPEED_1GB_FULL;
+ else if ((links_reg & IXGBE_LINKS_SPEED_82599) ==
+ IXGBE_LINKS_SPEED_100_82599)
+ *speed = IXGBE_LINK_SPEED_100_FULL;
+ else
+ *speed = IXGBE_LINK_SPEED_UNKNOWN;
+
+ return 0;
+}
+
+/**
+ * ixgbe_get_wwn_prefix_generic - Get alternative WWNN/WWPN prefix from
+ * the EEPROM
+ * @hw: pointer to hardware structure
+ * @wwnn_prefix: the alternative WWNN prefix
+ * @wwpn_prefix: the alternative WWPN prefix
+ *
+ * This function will read the EEPROM from the alternative SAN MAC address
+ * block to check the support for the alternative WWNN/WWPN prefix support.
+ **/
+s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+ u16 *wwpn_prefix)
+{
+ u16 offset, caps;
+ u16 alt_san_mac_blk_offset;
+
+ /* clear output first */
+ *wwnn_prefix = 0xFFFF;
+ *wwpn_prefix = 0xFFFF;
+
+ /* check if alternative SAN MAC is supported */
+ hw->eeprom.ops.read(hw, IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR,
+ &alt_san_mac_blk_offset);
+
+ if ((alt_san_mac_blk_offset == 0) ||
+ (alt_san_mac_blk_offset == 0xFFFF))
+ goto wwn_prefix_out;
+
+ /* check capability in alternative san mac address block */
+ offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET;
+ hw->eeprom.ops.read(hw, offset, &caps);
+ if (!(caps & IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN))
+ goto wwn_prefix_out;
+
+ /* get the corresponding prefix for WWNN/WWPN */
+ offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET;
+ hw->eeprom.ops.read(hw, offset, wwnn_prefix);
+
+ offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET;
+ hw->eeprom.ops.read(hw, offset, wwpn_prefix);
+
+wwn_prefix_out:
+ return 0;
+}
+
+/**
+ * ixgbe_get_fcoe_boot_status_generic - Get FCOE boot status from EEPROM
+ * @hw: pointer to hardware structure
+ * @bs: the fcoe boot status
+ *
+ * This function will read the FCOE boot status from the iSCSI FCOE block
+ **/
+s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs)
+{
+ u16 offset, caps, flags;
+ s32 status;
+
+ /* clear output first */
+ *bs = ixgbe_fcoe_bootstatus_unavailable;
+
+ /* check if FCOE IBA block is present */
+ offset = IXGBE_FCOE_IBA_CAPS_BLK_PTR;
+ status = hw->eeprom.ops.read(hw, offset, &caps);
+ if (status != 0)
+ goto out;
+
+ if (!(caps & IXGBE_FCOE_IBA_CAPS_FCOE))
+ goto out;
+
+ /* check if iSCSI FCOE block is populated */
+ status = hw->eeprom.ops.read(hw, IXGBE_ISCSI_FCOE_BLK_PTR, &offset);
+ if (status != 0)
+ goto out;
+
+ if ((offset == 0) || (offset == 0xFFFF))
+ goto out;
+
+ /* read fcoe flags in iSCSI FCOE block */
+ offset = offset + IXGBE_ISCSI_FCOE_FLAGS_OFFSET;
+ status = hw->eeprom.ops.read(hw, offset, &flags);
+ if (status != 0)
+ goto out;
+
+ if (flags & IXGBE_ISCSI_FCOE_FLAGS_ENABLE)
+ *bs = ixgbe_fcoe_bootstatus_enabled;
+ else
+ *bs = ixgbe_fcoe_bootstatus_disabled;
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_set_mac_anti_spoofing - Enable/Disable MAC anti-spoofing
+ * @hw: pointer to hardware structure
+ * @enable: enable or disable switch for anti-spoofing
+ * @pf: Physical Function pool - do not enable anti-spoofing for the PF
+ *
+ **/
+void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf)
+{
+ int j;
+ int pf_target_reg = pf >> 3;
+ int pf_target_shift = pf % 8;
+ u32 pfvfspoof = 0;
+
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ return;
+
+ if (enable)
+ pfvfspoof = IXGBE_SPOOF_MACAS_MASK;
+
+ /*
+ * PFVFSPOOF register array is size 8 with 8 bits assigned to
+ * MAC anti-spoof enables in each register array element.
+ */
+ for (j = 0; j < IXGBE_PFVFSPOOF_REG_COUNT; j++)
+ IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), pfvfspoof);
+
+ /* If not enabling anti-spoofing then done */
+ if (!enable)
+ return;
+
+ /*
+ * The PF should be allowed to spoof so that it can support
+ * emulation mode NICs. Reset the bit assigned to the PF
+ */
+ pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg));
+ pfvfspoof ^= (1 << pf_target_shift);
+ IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg), pfvfspoof);
+}
+
+/**
+ * ixgbe_set_vlan_anti_spoofing - Enable/Disable VLAN anti-spoofing
+ * @hw: pointer to hardware structure
+ * @enable: enable or disable switch for VLAN anti-spoofing
+ * @pf: Virtual Function pool - VF Pool to set for VLAN anti-spoofing
+ *
+ **/
+void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf)
+{
+ int vf_target_reg = vf >> 3;
+ int vf_target_shift = vf % 8 + IXGBE_SPOOF_VLANAS_SHIFT;
+ u32 pfvfspoof;
+
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ return;
+
+ pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg));
+ if (enable)
+ pfvfspoof |= (1 << vf_target_shift);
+ else
+ pfvfspoof &= ~(1 << vf_target_shift);
+ IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof);
+}
+
+/**
+ * ixgbe_get_device_caps_generic - Get additional device capabilities
+ * @hw: pointer to hardware structure
+ * @device_caps: the EEPROM word with the extra device capabilities
+ *
+ * This function will read the EEPROM location for the device capabilities,
+ * and return the word through device_caps.
+ **/
+s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps)
+{
+ hw->eeprom.ops.read(hw, IXGBE_DEVICE_CAPS, device_caps);
+
+ return 0;
+}
+
+/**
+ * ixgbe_calculate_checksum - Calculate checksum for buffer
+ * @buffer: pointer to EEPROM
+ * @length: size of EEPROM to calculate a checksum for
+ * Calculates the checksum for some buffer on a specified length. The
+ * checksum calculated is returned.
+ **/
+static u8 ixgbe_calculate_checksum(u8 *buffer, u32 length)
+{
+ u32 i;
+ u8 sum = 0;
+
+ if (!buffer)
+ return 0;
+ for (i = 0; i < length; i++)
+ sum += buffer[i];
+
+ return (u8) (0 - sum);
+}
+
+/**
+ * ixgbe_host_interface_command - Issue command to manageability block
+ * @hw: pointer to the HW structure
+ * @buffer: contains the command to write and where the return status will
+ * be placed
+ * @length: length of buffer, must be multiple of 4 bytes
+ *
+ * Communicates with the manageability block. On success return 0
+ * else return IXGBE_ERR_HOST_INTERFACE_COMMAND.
+ **/
+static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
+ u32 length)
+{
+ u32 hicr, i, bi;
+ u32 hdr_size = sizeof(struct ixgbe_hic_hdr);
+ u8 buf_len, dword_len;
+
+ s32 ret_val = 0;
+
+ if (length == 0 || length & 0x3 ||
+ length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
+ hw_dbg(hw, "Buffer length failure.\n");
+ ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+ goto out;
+ }
+
+ /* Check that the host interface is enabled. */
+ hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
+ if ((hicr & IXGBE_HICR_EN) == 0) {
+ hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n");
+ ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+ goto out;
+ }
+
+ /* Calculate length in DWORDs */
+ dword_len = length >> 2;
+
+ /*
+ * The device driver writes the relevant command block
+ * into the ram area.
+ */
+ for (i = 0; i < dword_len; i++)
+ IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG,
+ i, IXGBE_CPU_TO_LE32(buffer[i]));
+
+ /* Setting this bit tells the ARC that a new command is pending. */
+ IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C);
+
+ for (i = 0; i < IXGBE_HI_COMMAND_TIMEOUT; i++) {
+ hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
+ if (!(hicr & IXGBE_HICR_C))
+ break;
+ msleep(1);
+ }
+
+ /* Check command successful completion. */
+ if (i == IXGBE_HI_COMMAND_TIMEOUT ||
+ (!(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV))) {
+ hw_dbg(hw, "Command has failed with no status valid.\n");
+ ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+ goto out;
+ }
+
+ /* Calculate length in DWORDs */
+ dword_len = hdr_size >> 2;
+
+ /* first pull in the header so we know the buffer length */
+ for (bi = 0; bi < dword_len; bi++) {
+ buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+ IXGBE_LE32_TO_CPUS(&buffer[bi]);
+ }
+
+ /* If there is any thing in data position pull it in */
+ buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len;
+ if (buf_len == 0)
+ goto out;
+
+ if (length < (buf_len + hdr_size)) {
+ hw_dbg(hw, "Buffer not large enough for reply message.\n");
+ ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+ goto out;
+ }
+
+ /* Calculate length in DWORDs, add 3 for odd lengths */
+ dword_len = (buf_len + 3) >> 2;
+
+ /* Pull in the rest of the buffer (bi is where we left off)*/
+ for (; bi <= dword_len; bi++) {
+ buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+ IXGBE_LE32_TO_CPUS(&buffer[bi]);
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_set_fw_drv_ver_generic - Sends driver version to firmware
+ * @hw: pointer to the HW structure
+ * @maj: driver version major number
+ * @min: driver version minor number
+ * @build: driver version build number
+ * @sub: driver version sub build number
+ *
+ * Sends driver version number to firmware through the manageability
+ * block. On success return 0
+ * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring
+ * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
+ **/
+s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
+ u8 build, u8 sub)
+{
+ struct ixgbe_hic_drv_info fw_cmd;
+ int i;
+ s32 ret_val = 0;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM)
+ != 0) {
+ ret_val = IXGBE_ERR_SWFW_SYNC;
+ goto out;
+ }
+
+ fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
+ fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN;
+ fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+ fw_cmd.port_num = (u8)hw->bus.func;
+ fw_cmd.ver_maj = maj;
+ fw_cmd.ver_min = min;
+ fw_cmd.ver_build = build;
+ fw_cmd.ver_sub = sub;
+ fw_cmd.hdr.checksum = 0;
+ fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
+ (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
+ fw_cmd.pad = 0;
+ fw_cmd.pad2 = 0;
+
+ for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
+ ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
+ sizeof(fw_cmd));
+ if (ret_val != 0)
+ continue;
+
+ if (fw_cmd.hdr.cmd_or_resp.ret_status ==
+ FW_CEM_RESP_STATUS_SUCCESS)
+ ret_val = 0;
+ else
+ ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+
+ break;
+ }
+
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM);
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_set_rxpba_generic - Initialize Rx packet buffer
+ * @hw: pointer to hardware structure
+ * @num_pb: number of packet buffers to allocate
+ * @headroom: reserve n KB of headroom
+ * @strategy: packet buffer allocation strategy
+ **/
+void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom,
+ int strategy)
+{
+ u32 pbsize = hw->mac.rx_pb_size;
+ int i = 0;
+ u32 rxpktsize, txpktsize, txpbthresh;
+
+ /* Reserve headroom */
+ pbsize -= headroom;
+
+ if (!num_pb)
+ num_pb = 1;
+
+ /* Divide remaining packet buffer space amongst the number of packet
+ * buffers requested using supplied strategy.
+ */
+ switch (strategy) {
+ case PBA_STRATEGY_WEIGHTED:
+ /* ixgbe_dcb_pba_80_48 strategy weight first half of packet
+ * buffer with 5/8 of the packet buffer space.
+ */
+ rxpktsize = (pbsize * 5) / (num_pb * 4);
+ pbsize -= rxpktsize * (num_pb / 2);
+ rxpktsize <<= IXGBE_RXPBSIZE_SHIFT;
+ for (; i < (num_pb / 2); i++)
+ IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+ /* Fall through to configure remaining packet buffers */
+ case PBA_STRATEGY_EQUAL:
+ rxpktsize = (pbsize / (num_pb - i)) << IXGBE_RXPBSIZE_SHIFT;
+ for (; i < num_pb; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+ break;
+ default:
+ break;
+ }
+
+ /* Only support an equally distributed Tx packet buffer strategy. */
+ txpktsize = IXGBE_TXPBSIZE_MAX / num_pb;
+ txpbthresh = (txpktsize / 1024) - IXGBE_TXPKT_SIZE_MAX;
+ for (i = 0; i < num_pb; i++) {
+ IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
+ IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
+ }
+
+ /* Clear unused TCs, if any, to zero buffer size*/
+ for (; i < IXGBE_MAX_PB; i++) {
+ IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
+ }
+}
+
+/**
+ * ixgbe_clear_tx_pending - Clear pending TX work from the PCIe fifo
+ * @hw: pointer to the hardware structure
+ *
+ * The 82599 and x540 MACs can experience issues if TX work is still pending
+ * when a reset occurs. This function prevents this by flushing the PCIe
+ * buffers on the system.
+ **/
+void ixgbe_clear_tx_pending(struct ixgbe_hw *hw)
+{
+ u32 gcr_ext, hlreg0;
+
+ /*
+ * If double reset is not requested then all transactions should
+ * already be clear and as such there is no work to do
+ */
+ if (!(hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED))
+ return;
+
+ /*
+ * Set loopback enable to prevent any transmits from being sent
+ * should the link come up. This assumes that the RXCTRL.RXEN bit
+ * has already been cleared.
+ */
+ hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0 | IXGBE_HLREG0_LPBK);
+
+ /* initiate cleaning flow for buffers in the PCIe transaction layer */
+ gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT);
+ IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT,
+ gcr_ext | IXGBE_GCR_EXT_BUFFERS_CLEAR);
+
+ /* Flush all writes and allow 20usec for all transactions to clear */
+ IXGBE_WRITE_FLUSH(hw);
+ udelay(20);
+
+ /* restore previous register values */
+ IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext);
+ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
+}
+
+static const u8 ixgbe_emc_temp_data[4] = {
+ IXGBE_EMC_INTERNAL_DATA,
+ IXGBE_EMC_DIODE1_DATA,
+ IXGBE_EMC_DIODE2_DATA,
+ IXGBE_EMC_DIODE3_DATA
+};
+static const u8 ixgbe_emc_therm_limit[4] = {
+ IXGBE_EMC_INTERNAL_THERM_LIMIT,
+ IXGBE_EMC_DIODE1_THERM_LIMIT,
+ IXGBE_EMC_DIODE2_THERM_LIMIT,
+ IXGBE_EMC_DIODE3_THERM_LIMIT
+};
+
+/**
+ * ixgbe_get_thermal_sensor_data - Gathers thermal sensor data
+ * @hw: pointer to hardware structure
+ * @data: pointer to the thermal sensor data structure
+ *
+ * Returns the thermal sensor data structure
+ **/
+s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+ u16 ets_offset;
+ u16 ets_cfg;
+ u16 ets_sensor;
+ u8 num_sensors;
+ u8 sensor_index;
+ u8 sensor_location;
+ u8 i;
+ struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+ /* Only support thermal sensors attached to 82599 physical port 0 */
+ if ((hw->mac.type != ixgbe_mac_82599EB) ||
+ (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) {
+ status = IXGBE_NOT_IMPLEMENTED;
+ goto out;
+ }
+
+ status = hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset);
+ if (status)
+ goto out;
+
+ if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) {
+ status = IXGBE_NOT_IMPLEMENTED;
+ goto out;
+ }
+
+ status = hw->eeprom.ops.read(hw, ets_offset, &ets_cfg);
+ if (status)
+ goto out;
+
+ if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT)
+ != IXGBE_ETS_TYPE_EMC) {
+ status = IXGBE_NOT_IMPLEMENTED;
+ goto out;
+ }
+
+ num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK);
+ if (num_sensors > IXGBE_MAX_SENSORS)
+ num_sensors = IXGBE_MAX_SENSORS;
+
+ for (i = 0; i < num_sensors; i++) {
+ status = hw->eeprom.ops.read(hw, (ets_offset + 1 + i),
+ &ets_sensor);
+ if (status)
+ goto out;
+
+ sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
+ IXGBE_ETS_DATA_INDEX_SHIFT);
+ sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
+ IXGBE_ETS_DATA_LOC_SHIFT);
+
+ if (sensor_location != 0) {
+ status = hw->phy.ops.read_i2c_byte(hw,
+ ixgbe_emc_temp_data[sensor_index],
+ IXGBE_I2C_THERMAL_SENSOR_ADDR,
+ &data->sensor[i].temp);
+ if (status)
+ goto out;
+ }
+ }
+out:
+ return status;
+}
+
+/**
+ * ixgbe_init_thermal_sensor_thresh_generic - Inits thermal sensor thresholds
+ * @hw: pointer to hardware structure
+ *
+ * Inits the thermal sensor thresholds according to the NVM map
+ * and save off the threshold and location values into mac.thermal_sensor_data
+ **/
+s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+ u16 ets_offset;
+ u16 ets_cfg;
+ u16 ets_sensor;
+ u8 low_thresh_delta;
+ u8 num_sensors;
+ u8 sensor_index;
+ u8 sensor_location;
+ u8 therm_limit;
+ u8 i;
+ struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+ memset(data, 0, sizeof(struct ixgbe_thermal_sensor_data));
+
+ /* Only support thermal sensors attached to 82599 physical port 0 */
+ if ((hw->mac.type != ixgbe_mac_82599EB) ||
+ (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1))
+ return IXGBE_NOT_IMPLEMENTED;
+
+ hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset);
+ if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
+ return IXGBE_NOT_IMPLEMENTED;
+
+ hw->eeprom.ops.read(hw, ets_offset, &ets_cfg);
+ if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT)
+ != IXGBE_ETS_TYPE_EMC)
+ return IXGBE_NOT_IMPLEMENTED;
+
+ low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >>
+ IXGBE_ETS_LTHRES_DELTA_SHIFT);
+ num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK);
+
+ for (i = 0; i < num_sensors; i++) {
+ hw->eeprom.ops.read(hw, (ets_offset + 1 + i), &ets_sensor);
+ sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
+ IXGBE_ETS_DATA_INDEX_SHIFT);
+ sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
+ IXGBE_ETS_DATA_LOC_SHIFT);
+ therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK;
+
+ hw->phy.ops.write_i2c_byte(hw,
+ ixgbe_emc_therm_limit[sensor_index],
+ IXGBE_I2C_THERMAL_SENSOR_ADDR, therm_limit);
+
+ if ((i < IXGBE_MAX_SENSORS) && (sensor_location != 0)) {
+ data->sensor[i].location = sensor_location;
+ data->sensor[i].caution_thresh = therm_limit;
+ data->sensor[i].max_op_thresh = therm_limit -
+ low_thresh_delta;
+ }
+ }
+ return status;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
new file mode 100644
index 00000000..9bd6f534
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
@@ -0,0 +1,140 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_COMMON_H_
+#define _IXGBE_COMMON_H_
+
+#include "ixgbe_type.h"
+
+u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw);
+
+s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw);
+s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw);
+s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw);
+s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
+ u32 pba_num_size);
+s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr);
+s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw);
+void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw);
+s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw);
+
+s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index);
+
+s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw);
+s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
+s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data);
+s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data);
+s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data);
+s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
+s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data);
+s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+ u16 *data);
+s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+ u16 words, u16 *data);
+u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw);
+s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
+ u16 *checksum_val);
+s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw);
+s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg);
+
+s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+ u32 enable_addr);
+s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw);
+s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list,
+ u32 mc_addr_count,
+ ixgbe_mc_addr_itr func, bool clear);
+s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list,
+ u32 addr_count, ixgbe_mc_addr_itr func);
+s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw);
+s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw);
+s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval);
+s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw);
+s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw);
+
+s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw);
+void ixgbe_fc_autoneg(struct ixgbe_hw *hw);
+
+s32 ixgbe_validate_mac_addr(u8 *mac_addr);
+s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask);
+void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask);
+s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw);
+
+s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index);
+
+s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr);
+s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr);
+
+s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq);
+s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq);
+s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw);
+s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan,
+ u32 vind, bool vlan_on);
+s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+ bool vlan_on, bool *vfta_changed);
+s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw);
+s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan);
+
+s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed,
+ bool *link_up, bool link_up_wait_to_complete);
+
+s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+ u16 *wwpn_prefix);
+
+s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs);
+void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf);
+void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf);
+s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps);
+void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom,
+ int strategy);
+s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
+ u8 build, u8 ver);
+void ixgbe_clear_tx_pending(struct ixgbe_hw *hw);
+
+#define IXGBE_I2C_THERMAL_SENSOR_ADDR 0xF8
+#define IXGBE_EMC_INTERNAL_DATA 0x00
+#define IXGBE_EMC_INTERNAL_THERM_LIMIT 0x20
+#define IXGBE_EMC_DIODE1_DATA 0x01
+#define IXGBE_EMC_DIODE1_THERM_LIMIT 0x19
+#define IXGBE_EMC_DIODE2_DATA 0x23
+#define IXGBE_EMC_DIODE2_THERM_LIMIT 0x1A
+#define IXGBE_EMC_DIODE3_DATA 0x2A
+#define IXGBE_EMC_DIODE3_THERM_LIMIT 0x30
+
+s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw);
+s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw);
+#endif /* IXGBE_COMMON */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
new file mode 100644
index 00000000..a6690451
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
@@ -0,0 +1,168 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_DCB_H_
+#define _IXGBE_DCB_H_
+
+
+#include "ixgbe_type.h"
+
+/* DCB defines */
+/* DCB credit calculation defines */
+#define IXGBE_DCB_CREDIT_QUANTUM 64
+#define IXGBE_DCB_MAX_CREDIT_REFILL 200 /* 200 * 64B = 12800B */
+#define IXGBE_DCB_MAX_TSO_SIZE (32 * 1024) /* Max TSO pkt size in DCB*/
+#define IXGBE_DCB_MAX_CREDIT (2 * IXGBE_DCB_MAX_CREDIT_REFILL)
+
+/* 513 for 32KB TSO packet */
+#define IXGBE_DCB_MIN_TSO_CREDIT \
+ ((IXGBE_DCB_MAX_TSO_SIZE / IXGBE_DCB_CREDIT_QUANTUM) + 1)
+
+/* DCB configuration defines */
+#define IXGBE_DCB_MAX_USER_PRIORITY 8
+#define IXGBE_DCB_MAX_BW_GROUP 8
+#define IXGBE_DCB_BW_PERCENT 100
+
+#define IXGBE_DCB_TX_CONFIG 0
+#define IXGBE_DCB_RX_CONFIG 1
+
+/* DCB capability defines */
+#define IXGBE_DCB_PG_SUPPORT 0x00000001
+#define IXGBE_DCB_PFC_SUPPORT 0x00000002
+#define IXGBE_DCB_BCN_SUPPORT 0x00000004
+#define IXGBE_DCB_UP2TC_SUPPORT 0x00000008
+#define IXGBE_DCB_GSP_SUPPORT 0x00000010
+
+struct ixgbe_dcb_support {
+ u32 capabilities; /* DCB capabilities */
+
+ /* Each bit represents a number of TCs configurable in the hw.
+ * If 8 traffic classes can be configured, the value is 0x80. */
+ u8 traffic_classes;
+ u8 pfc_traffic_classes;
+};
+
+enum ixgbe_dcb_tsa {
+ ixgbe_dcb_tsa_ets = 0,
+ ixgbe_dcb_tsa_group_strict_cee,
+ ixgbe_dcb_tsa_strict
+};
+
+/* Traffic class bandwidth allocation per direction */
+struct ixgbe_dcb_tc_path {
+ u8 bwg_id; /* Bandwidth Group (BWG) ID */
+ u8 bwg_percent; /* % of BWG's bandwidth */
+ u8 link_percent; /* % of link bandwidth */
+ u8 up_to_tc_bitmap; /* User Priority to Traffic Class mapping */
+ u16 data_credits_refill; /* Credit refill amount in 64B granularity */
+ u16 data_credits_max; /* Max credits for a configured packet buffer
+ * in 64B granularity.*/
+ enum ixgbe_dcb_tsa tsa; /* Link or Group Strict Priority */
+};
+
+enum ixgbe_dcb_pfc {
+ ixgbe_dcb_pfc_disabled = 0,
+ ixgbe_dcb_pfc_enabled,
+ ixgbe_dcb_pfc_enabled_txonly,
+ ixgbe_dcb_pfc_enabled_rxonly
+};
+
+/* Traffic class configuration */
+struct ixgbe_dcb_tc_config {
+ struct ixgbe_dcb_tc_path path[2]; /* One each for Tx/Rx */
+ enum ixgbe_dcb_pfc pfc; /* Class based flow control setting */
+
+ u16 desc_credits_max; /* For Tx Descriptor arbitration */
+ u8 tc; /* Traffic class (TC) */
+};
+
+enum ixgbe_dcb_pba {
+ /* PBA[0-7] each use 64KB FIFO */
+ ixgbe_dcb_pba_equal = PBA_STRATEGY_EQUAL,
+ /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */
+ ixgbe_dcb_pba_80_48 = PBA_STRATEGY_WEIGHTED
+};
+
+struct ixgbe_dcb_num_tcs {
+ u8 pg_tcs;
+ u8 pfc_tcs;
+};
+
+struct ixgbe_dcb_config {
+ struct ixgbe_dcb_tc_config tc_config[IXGBE_DCB_MAX_TRAFFIC_CLASS];
+ struct ixgbe_dcb_support support;
+ struct ixgbe_dcb_num_tcs num_tcs;
+ u8 bw_percentage[2][IXGBE_DCB_MAX_BW_GROUP]; /* One each for Tx/Rx */
+ bool pfc_mode_enable;
+ bool round_robin_enable;
+
+ enum ixgbe_dcb_pba rx_pba_cfg;
+
+ u32 dcb_cfg_version; /* Not used...OS-specific? */
+ u32 link_speed; /* For bandwidth allocation validation purpose */
+ bool vt_mode;
+};
+
+/* DCB driver APIs */
+
+/* DCB rule checking */
+s32 ixgbe_dcb_check_config_cee(struct ixgbe_dcb_config *);
+
+/* DCB credits calculation */
+s32 ixgbe_dcb_calculate_tc_credits(u8 *, u16 *, u16 *, int);
+s32 ixgbe_dcb_calculate_tc_credits_cee(struct ixgbe_hw *,
+ struct ixgbe_dcb_config *, u32, u8);
+
+/* DCB PFC */
+s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *, u8, u8 *);
+s32 ixgbe_dcb_config_pfc_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+/* DCB stats */
+s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *);
+s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
+s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
+
+/* DCB config arbiters */
+s32 ixgbe_dcb_config_tx_desc_arbiter_cee(struct ixgbe_hw *,
+ struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_tx_data_arbiter_cee(struct ixgbe_hw *,
+ struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_rx_arbiter_cee(struct ixgbe_hw *,
+ struct ixgbe_dcb_config *);
+
+/* DCB unpack routines */
+void ixgbe_dcb_unpack_pfc_cee(struct ixgbe_dcb_config *, u8 *, u8 *);
+void ixgbe_dcb_unpack_refill_cee(struct ixgbe_dcb_config *, int, u16 *);
+void ixgbe_dcb_unpack_max_cee(struct ixgbe_dcb_config *, u16 *);
+void ixgbe_dcb_unpack_bwgid_cee(struct ixgbe_dcb_config *, int, u8 *);
+void ixgbe_dcb_unpack_tsa_cee(struct ixgbe_dcb_config *, int, u8 *);
+void ixgbe_dcb_unpack_map_cee(struct ixgbe_dcb_config *, int, u8 *);
+
+/* DCB initialization */
+s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, u16 *, u16 *, u8 *, u8 *, u8 *);
+s32 ixgbe_dcb_hw_config_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+#endif /* _IXGBE_DCB_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
new file mode 100644
index 00000000..11472bd3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
@@ -0,0 +1,2901 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* ethtool support for ixgbe */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#ifdef SIOCETHTOOL
+#include <asm/uaccess.h>
+
+#include "ixgbe.h"
+
+#ifndef ETH_GSTRING_LEN
+#define ETH_GSTRING_LEN 32
+#endif
+
+#define IXGBE_ALL_RAR_ENTRIES 16
+
+#ifdef ETHTOOL_OPS_COMPAT
+#include "kcompat_ethtool.c"
+#endif
+#ifdef ETHTOOL_GSTATS
+struct ixgbe_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ int sizeof_stat;
+ int stat_offset;
+};
+
+#define IXGBE_NETDEV_STAT(_net_stat) { \
+ .stat_string = #_net_stat, \
+ .sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \
+ .stat_offset = offsetof(struct net_device_stats, _net_stat) \
+}
+static const struct ixgbe_stats ixgbe_gstrings_net_stats[] = {
+ IXGBE_NETDEV_STAT(rx_packets),
+ IXGBE_NETDEV_STAT(tx_packets),
+ IXGBE_NETDEV_STAT(rx_bytes),
+ IXGBE_NETDEV_STAT(tx_bytes),
+ IXGBE_NETDEV_STAT(rx_errors),
+ IXGBE_NETDEV_STAT(tx_errors),
+ IXGBE_NETDEV_STAT(rx_dropped),
+ IXGBE_NETDEV_STAT(tx_dropped),
+ IXGBE_NETDEV_STAT(multicast),
+ IXGBE_NETDEV_STAT(collisions),
+ IXGBE_NETDEV_STAT(rx_over_errors),
+ IXGBE_NETDEV_STAT(rx_crc_errors),
+ IXGBE_NETDEV_STAT(rx_frame_errors),
+ IXGBE_NETDEV_STAT(rx_fifo_errors),
+ IXGBE_NETDEV_STAT(rx_missed_errors),
+ IXGBE_NETDEV_STAT(tx_aborted_errors),
+ IXGBE_NETDEV_STAT(tx_carrier_errors),
+ IXGBE_NETDEV_STAT(tx_fifo_errors),
+ IXGBE_NETDEV_STAT(tx_heartbeat_errors),
+};
+
+#define IXGBE_STAT(_name, _stat) { \
+ .stat_string = _name, \
+ .sizeof_stat = FIELD_SIZEOF(struct ixgbe_adapter, _stat), \
+ .stat_offset = offsetof(struct ixgbe_adapter, _stat) \
+}
+static struct ixgbe_stats ixgbe_gstrings_stats[] = {
+ IXGBE_STAT("rx_pkts_nic", stats.gprc),
+ IXGBE_STAT("tx_pkts_nic", stats.gptc),
+ IXGBE_STAT("rx_bytes_nic", stats.gorc),
+ IXGBE_STAT("tx_bytes_nic", stats.gotc),
+ IXGBE_STAT("lsc_int", lsc_int),
+ IXGBE_STAT("tx_busy", tx_busy),
+ IXGBE_STAT("non_eop_descs", non_eop_descs),
+#ifndef CONFIG_IXGBE_NAPI
+ IXGBE_STAT("rx_dropped_backlog", rx_dropped_backlog),
+#endif
+ IXGBE_STAT("broadcast", stats.bprc),
+ IXGBE_STAT("rx_no_buffer_count", stats.rnbc[0]) ,
+ IXGBE_STAT("tx_timeout_count", tx_timeout_count),
+ IXGBE_STAT("tx_restart_queue", restart_queue),
+ IXGBE_STAT("rx_long_length_errors", stats.roc),
+ IXGBE_STAT("rx_short_length_errors", stats.ruc),
+ IXGBE_STAT("tx_flow_control_xon", stats.lxontxc),
+ IXGBE_STAT("rx_flow_control_xon", stats.lxonrxc),
+ IXGBE_STAT("tx_flow_control_xoff", stats.lxofftxc),
+ IXGBE_STAT("rx_flow_control_xoff", stats.lxoffrxc),
+ IXGBE_STAT("rx_csum_offload_errors", hw_csum_rx_error),
+ IXGBE_STAT("alloc_rx_page_failed", alloc_rx_page_failed),
+ IXGBE_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed),
+#ifndef IXGBE_NO_LRO
+ IXGBE_STAT("lro_aggregated", lro_stats.coal),
+ IXGBE_STAT("lro_flushed", lro_stats.flushed),
+#endif /* IXGBE_NO_LRO */
+ IXGBE_STAT("rx_no_dma_resources", hw_rx_no_dma_resources),
+ IXGBE_STAT("hw_rsc_aggregated", rsc_total_count),
+ IXGBE_STAT("hw_rsc_flushed", rsc_total_flush),
+#ifdef HAVE_TX_MQ
+ IXGBE_STAT("fdir_match", stats.fdirmatch),
+ IXGBE_STAT("fdir_miss", stats.fdirmiss),
+ IXGBE_STAT("fdir_overflow", fdir_overflow),
+#endif /* HAVE_TX_MQ */
+#ifdef IXGBE_FCOE
+ IXGBE_STAT("fcoe_bad_fccrc", stats.fccrc),
+ IXGBE_STAT("fcoe_last_errors", stats.fclast),
+ IXGBE_STAT("rx_fcoe_dropped", stats.fcoerpdc),
+ IXGBE_STAT("rx_fcoe_packets", stats.fcoeprc),
+ IXGBE_STAT("rx_fcoe_dwords", stats.fcoedwrc),
+ IXGBE_STAT("fcoe_noddp", stats.fcoe_noddp),
+ IXGBE_STAT("fcoe_noddp_ext_buff", stats.fcoe_noddp_ext_buff),
+ IXGBE_STAT("tx_fcoe_packets", stats.fcoeptc),
+ IXGBE_STAT("tx_fcoe_dwords", stats.fcoedwtc),
+#endif /* IXGBE_FCOE */
+ IXGBE_STAT("os2bmc_rx_by_bmc", stats.o2bgptc),
+ IXGBE_STAT("os2bmc_tx_by_bmc", stats.b2ospc),
+ IXGBE_STAT("os2bmc_tx_by_host", stats.o2bspc),
+ IXGBE_STAT("os2bmc_rx_by_host", stats.b2ogprc),
+};
+
+#define IXGBE_QUEUE_STATS_LEN \
+ ((((struct ixgbe_adapter *)netdev_priv(netdev))->num_tx_queues + \
+ ((struct ixgbe_adapter *)netdev_priv(netdev))->num_rx_queues) * \
+ (sizeof(struct ixgbe_queue_stats) / sizeof(u64)))
+#define IXGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_stats)
+#define IXGBE_NETDEV_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_net_stats)
+#define IXGBE_PB_STATS_LEN ( \
+ (((struct ixgbe_adapter *)netdev_priv(netdev))->flags & \
+ IXGBE_FLAG_DCB_ENABLED) ? \
+ (sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \
+ sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \
+ sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \
+ sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \
+ / sizeof(u64) : 0)
+#define IXGBE_VF_STATS_LEN \
+ ((((struct ixgbe_adapter *)netdev_priv(netdev))->num_vfs) * \
+ (sizeof(struct vf_stats) / sizeof(u64)))
+#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \
+ IXGBE_NETDEV_STATS_LEN + \
+ IXGBE_PB_STATS_LEN + \
+ IXGBE_QUEUE_STATS_LEN + \
+ IXGBE_VF_STATS_LEN)
+
+#endif /* ETHTOOL_GSTATS */
+#ifdef ETHTOOL_TEST
+static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
+ "Register test (offline)", "Eeprom test (offline)",
+ "Interrupt test (offline)", "Loopback test (offline)",
+ "Link test (on/offline)"
+};
+#define IXGBE_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
+#endif /* ETHTOOL_TEST */
+
+int ixgbe_get_settings(struct net_device *netdev,
+ struct ethtool_cmd *ecmd)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 link_speed = 0;
+ bool link_up;
+
+ ecmd->supported = SUPPORTED_10000baseT_Full;
+ ecmd->autoneg = AUTONEG_ENABLE;
+ ecmd->transceiver = XCVR_EXTERNAL;
+ if ((hw->phy.media_type == ixgbe_media_type_copper) ||
+ (hw->phy.multispeed_fiber)) {
+ ecmd->supported |= (SUPPORTED_1000baseT_Full |
+ SUPPORTED_Autoneg);
+ switch (hw->mac.type) {
+ case ixgbe_mac_X540:
+ ecmd->supported |= SUPPORTED_100baseT_Full;
+ break;
+ default:
+ break;
+ }
+
+ ecmd->advertising = ADVERTISED_Autoneg;
+ if (hw->phy.autoneg_advertised) {
+ if (hw->phy.autoneg_advertised &
+ IXGBE_LINK_SPEED_100_FULL)
+ ecmd->advertising |= ADVERTISED_100baseT_Full;
+ if (hw->phy.autoneg_advertised &
+ IXGBE_LINK_SPEED_10GB_FULL)
+ ecmd->advertising |= ADVERTISED_10000baseT_Full;
+ if (hw->phy.autoneg_advertised &
+ IXGBE_LINK_SPEED_1GB_FULL)
+ ecmd->advertising |= ADVERTISED_1000baseT_Full;
+ } else {
+ /*
+ * Default advertised modes in case
+ * phy.autoneg_advertised isn't set.
+ */
+ ecmd->advertising |= (ADVERTISED_10000baseT_Full |
+ ADVERTISED_1000baseT_Full);
+ if (hw->mac.type == ixgbe_mac_X540)
+ ecmd->advertising |= ADVERTISED_100baseT_Full;
+ }
+
+ if (hw->phy.media_type == ixgbe_media_type_copper) {
+ ecmd->supported |= SUPPORTED_TP;
+ ecmd->advertising |= ADVERTISED_TP;
+ ecmd->port = PORT_TP;
+ } else {
+ ecmd->supported |= SUPPORTED_FIBRE;
+ ecmd->advertising |= ADVERTISED_FIBRE;
+ ecmd->port = PORT_FIBRE;
+ }
+ } else if (hw->phy.media_type == ixgbe_media_type_backplane) {
+ /* Set as FIBRE until SERDES defined in kernel */
+ if (hw->device_id == IXGBE_DEV_ID_82598_BX) {
+ ecmd->supported = (SUPPORTED_1000baseT_Full |
+ SUPPORTED_FIBRE);
+ ecmd->advertising = (ADVERTISED_1000baseT_Full |
+ ADVERTISED_FIBRE);
+ ecmd->port = PORT_FIBRE;
+ ecmd->autoneg = AUTONEG_DISABLE;
+ } else if ((hw->device_id == IXGBE_DEV_ID_82599_COMBO_BACKPLANE)
+ || (hw->device_id == IXGBE_DEV_ID_82599_KX4_MEZZ)) {
+ ecmd->supported |= (SUPPORTED_1000baseT_Full |
+ SUPPORTED_Autoneg |
+ SUPPORTED_FIBRE);
+ ecmd->advertising = (ADVERTISED_10000baseT_Full |
+ ADVERTISED_1000baseT_Full |
+ ADVERTISED_Autoneg |
+ ADVERTISED_FIBRE);
+ ecmd->port = PORT_FIBRE;
+ } else {
+ ecmd->supported |= (SUPPORTED_1000baseT_Full |
+ SUPPORTED_FIBRE);
+ ecmd->advertising = (ADVERTISED_10000baseT_Full |
+ ADVERTISED_1000baseT_Full |
+ ADVERTISED_FIBRE);
+ ecmd->port = PORT_FIBRE;
+ }
+ } else {
+ ecmd->supported |= SUPPORTED_FIBRE;
+ ecmd->advertising = (ADVERTISED_10000baseT_Full |
+ ADVERTISED_FIBRE);
+ ecmd->port = PORT_FIBRE;
+ ecmd->autoneg = AUTONEG_DISABLE;
+ }
+
+#ifdef HAVE_ETHTOOL_SFP_DISPLAY_PORT
+ /* Get PHY type */
+ switch (adapter->hw.phy.type) {
+ case ixgbe_phy_tn:
+ case ixgbe_phy_aq:
+ case ixgbe_phy_cu_unknown:
+ /* Copper 10G-BASET */
+ ecmd->port = PORT_TP;
+ break;
+ case ixgbe_phy_qt:
+ ecmd->port = PORT_FIBRE;
+ break;
+ case ixgbe_phy_nl:
+ case ixgbe_phy_sfp_passive_tyco:
+ case ixgbe_phy_sfp_passive_unknown:
+ case ixgbe_phy_sfp_ftl:
+ case ixgbe_phy_sfp_avago:
+ case ixgbe_phy_sfp_intel:
+ case ixgbe_phy_sfp_unknown:
+ switch (adapter->hw.phy.sfp_type) {
+ /* SFP+ devices, further checking needed */
+ case ixgbe_sfp_type_da_cu:
+ case ixgbe_sfp_type_da_cu_core0:
+ case ixgbe_sfp_type_da_cu_core1:
+ ecmd->port = PORT_DA;
+ break;
+ case ixgbe_sfp_type_sr:
+ case ixgbe_sfp_type_lr:
+ case ixgbe_sfp_type_srlr_core0:
+ case ixgbe_sfp_type_srlr_core1:
+ ecmd->port = PORT_FIBRE;
+ break;
+ case ixgbe_sfp_type_not_present:
+ ecmd->port = PORT_NONE;
+ break;
+ case ixgbe_sfp_type_1g_cu_core0:
+ case ixgbe_sfp_type_1g_cu_core1:
+ ecmd->port = PORT_TP;
+ ecmd->supported = SUPPORTED_TP;
+ ecmd->advertising = (ADVERTISED_1000baseT_Full |
+ ADVERTISED_TP);
+ break;
+ case ixgbe_sfp_type_1g_sx_core0:
+ case ixgbe_sfp_type_1g_sx_core1:
+ ecmd->port = PORT_FIBRE;
+ ecmd->supported = SUPPORTED_FIBRE;
+ ecmd->advertising = (ADVERTISED_1000baseT_Full |
+ ADVERTISED_FIBRE);
+ break;
+ case ixgbe_sfp_type_unknown:
+ default:
+ ecmd->port = PORT_OTHER;
+ break;
+ }
+ break;
+ case ixgbe_phy_xaui:
+ ecmd->port = PORT_NONE;
+ break;
+ case ixgbe_phy_unknown:
+ case ixgbe_phy_generic:
+ case ixgbe_phy_sfp_unsupported:
+ default:
+ ecmd->port = PORT_OTHER;
+ break;
+ }
+#endif
+
+ if (!in_interrupt()) {
+ hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+ } else {
+ /*
+ * this case is a special workaround for RHEL5 bonding
+ * that calls this routine from interrupt context
+ */
+ link_speed = adapter->link_speed;
+ link_up = adapter->link_up;
+ }
+
+ if (link_up) {
+ switch (link_speed) {
+ case IXGBE_LINK_SPEED_10GB_FULL:
+ ecmd->speed = SPEED_10000;
+ break;
+ case IXGBE_LINK_SPEED_1GB_FULL:
+ ecmd->speed = SPEED_1000;
+ break;
+ case IXGBE_LINK_SPEED_100_FULL:
+ ecmd->speed = SPEED_100;
+ break;
+ default:
+ break;
+ }
+ ecmd->duplex = DUPLEX_FULL;
+ } else {
+ ecmd->speed = -1;
+ ecmd->duplex = -1;
+ }
+
+ return 0;
+}
+
+static int ixgbe_set_settings(struct net_device *netdev,
+ struct ethtool_cmd *ecmd)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 advertised, old;
+ s32 err = 0;
+
+ if ((hw->phy.media_type == ixgbe_media_type_copper) ||
+ (hw->phy.multispeed_fiber)) {
+ /*
+ * this function does not support duplex forcing, but can
+ * limit the advertising of the adapter to the specified speed
+ */
+ if (ecmd->autoneg == AUTONEG_DISABLE)
+ return -EINVAL;
+
+ if (ecmd->advertising & ~ecmd->supported)
+ return -EINVAL;
+
+ old = hw->phy.autoneg_advertised;
+ advertised = 0;
+ if (ecmd->advertising & ADVERTISED_10000baseT_Full)
+ advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+ if (ecmd->advertising & ADVERTISED_1000baseT_Full)
+ advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+ if (ecmd->advertising & ADVERTISED_100baseT_Full)
+ advertised |= IXGBE_LINK_SPEED_100_FULL;
+
+ if (old == advertised)
+ return err;
+ /* this sets the link speed and restarts auto-neg */
+ hw->mac.autotry_restart = true;
+ err = hw->mac.ops.setup_link(hw, advertised, true, true);
+ if (err) {
+ e_info(probe, "setup link failed with code %d\n", err);
+ hw->mac.ops.setup_link(hw, old, true, true);
+ }
+ }
+ return err;
+}
+
+static void ixgbe_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pause)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ if (hw->fc.disable_fc_autoneg)
+ pause->autoneg = 0;
+ else
+ pause->autoneg = 1;
+
+ if (hw->fc.current_mode == ixgbe_fc_rx_pause) {
+ pause->rx_pause = 1;
+ } else if (hw->fc.current_mode == ixgbe_fc_tx_pause) {
+ pause->tx_pause = 1;
+ } else if (hw->fc.current_mode == ixgbe_fc_full) {
+ pause->rx_pause = 1;
+ pause->tx_pause = 1;
+ }
+}
+
+static int ixgbe_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pause)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_fc_info fc = hw->fc;
+
+ /* 82598 does no support link flow control with DCB enabled */
+ if ((hw->mac.type == ixgbe_mac_82598EB) &&
+ (adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+ return -EINVAL;
+
+ fc.disable_fc_autoneg = (pause->autoneg != AUTONEG_ENABLE);
+
+ if ((pause->rx_pause && pause->tx_pause) || pause->autoneg)
+ fc.requested_mode = ixgbe_fc_full;
+ else if (pause->rx_pause)
+ fc.requested_mode = ixgbe_fc_rx_pause;
+ else if (pause->tx_pause)
+ fc.requested_mode = ixgbe_fc_tx_pause;
+ else
+ fc.requested_mode = ixgbe_fc_none;
+
+ /* if the thing changed then we'll update and use new autoneg */
+ if (memcmp(&fc, &hw->fc, sizeof(struct ixgbe_fc_info))) {
+ hw->fc = fc;
+ if (netif_running(netdev))
+ ixgbe_reinit_locked(adapter);
+ else
+ ixgbe_reset(adapter);
+ }
+
+ return 0;
+}
+
+static u32 ixgbe_get_msglevel(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ return adapter->msg_enable;
+}
+
+static void ixgbe_set_msglevel(struct net_device *netdev, u32 data)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ adapter->msg_enable = data;
+}
+
+static int ixgbe_get_regs_len(struct net_device *netdev)
+{
+#define IXGBE_REGS_LEN 1129
+ return IXGBE_REGS_LEN * sizeof(u32);
+}
+
+#define IXGBE_GET_STAT(_A_, _R_) (_A_->stats._R_)
+
+
+static void ixgbe_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
+ void *p)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 *regs_buff = p;
+ u8 i;
+
+ printk(KERN_DEBUG "ixgbe_get_regs_1\n");
+ memset(p, 0, IXGBE_REGS_LEN * sizeof(u32));
+ printk(KERN_DEBUG "ixgbe_get_regs_2 0x%p\n", hw->hw_addr);
+
+ regs->version = (1 << 24) | hw->revision_id << 16 | hw->device_id;
+
+ /* General Registers */
+ regs_buff[0] = IXGBE_READ_REG(hw, IXGBE_CTRL);
+ printk(KERN_DEBUG "ixgbe_get_regs_3\n");
+ regs_buff[1] = IXGBE_READ_REG(hw, IXGBE_STATUS);
+ regs_buff[2] = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
+ regs_buff[3] = IXGBE_READ_REG(hw, IXGBE_ESDP);
+ regs_buff[4] = IXGBE_READ_REG(hw, IXGBE_EODSDP);
+ regs_buff[5] = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+ regs_buff[6] = IXGBE_READ_REG(hw, IXGBE_FRTIMER);
+ regs_buff[7] = IXGBE_READ_REG(hw, IXGBE_TCPTIMER);
+
+ printk(KERN_DEBUG "ixgbe_get_regs_4\n");
+
+ /* NVM Register */
+ regs_buff[8] = IXGBE_READ_REG(hw, IXGBE_EEC);
+ regs_buff[9] = IXGBE_READ_REG(hw, IXGBE_EERD);
+ regs_buff[10] = IXGBE_READ_REG(hw, IXGBE_FLA);
+ regs_buff[11] = IXGBE_READ_REG(hw, IXGBE_EEMNGCTL);
+ regs_buff[12] = IXGBE_READ_REG(hw, IXGBE_EEMNGDATA);
+ regs_buff[13] = IXGBE_READ_REG(hw, IXGBE_FLMNGCTL);
+ regs_buff[14] = IXGBE_READ_REG(hw, IXGBE_FLMNGDATA);
+ regs_buff[15] = IXGBE_READ_REG(hw, IXGBE_FLMNGCNT);
+ regs_buff[16] = IXGBE_READ_REG(hw, IXGBE_FLOP);
+ regs_buff[17] = IXGBE_READ_REG(hw, IXGBE_GRC);
+
+ /* Interrupt */
+ /* don't read EICR because it can clear interrupt causes, instead
+ * read EICS which is a shadow but doesn't clear EICR */
+ regs_buff[18] = IXGBE_READ_REG(hw, IXGBE_EICS);
+ regs_buff[19] = IXGBE_READ_REG(hw, IXGBE_EICS);
+ regs_buff[20] = IXGBE_READ_REG(hw, IXGBE_EIMS);
+ regs_buff[21] = IXGBE_READ_REG(hw, IXGBE_EIMC);
+ regs_buff[22] = IXGBE_READ_REG(hw, IXGBE_EIAC);
+ regs_buff[23] = IXGBE_READ_REG(hw, IXGBE_EIAM);
+ regs_buff[24] = IXGBE_READ_REG(hw, IXGBE_EITR(0));
+ regs_buff[25] = IXGBE_READ_REG(hw, IXGBE_IVAR(0));
+ regs_buff[26] = IXGBE_READ_REG(hw, IXGBE_MSIXT);
+ regs_buff[27] = IXGBE_READ_REG(hw, IXGBE_MSIXPBA);
+ regs_buff[28] = IXGBE_READ_REG(hw, IXGBE_PBACL(0));
+ regs_buff[29] = IXGBE_READ_REG(hw, IXGBE_GPIE);
+
+ /* Flow Control */
+ regs_buff[30] = IXGBE_READ_REG(hw, IXGBE_PFCTOP);
+ regs_buff[31] = IXGBE_READ_REG(hw, IXGBE_FCTTV(0));
+ regs_buff[32] = IXGBE_READ_REG(hw, IXGBE_FCTTV(1));
+ regs_buff[33] = IXGBE_READ_REG(hw, IXGBE_FCTTV(2));
+ regs_buff[34] = IXGBE_READ_REG(hw, IXGBE_FCTTV(3));
+ for (i = 0; i < 8; i++) {
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL(i));
+ regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH(i));
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ regs_buff[35 + i] = IXGBE_READ_REG(hw,
+ IXGBE_FCRTL_82599(i));
+ regs_buff[43 + i] = IXGBE_READ_REG(hw,
+ IXGBE_FCRTH_82599(i));
+ break;
+ default:
+ break;
+ }
+ }
+ regs_buff[51] = IXGBE_READ_REG(hw, IXGBE_FCRTV);
+ regs_buff[52] = IXGBE_READ_REG(hw, IXGBE_TFCS);
+
+ /* Receive DMA */
+ for (i = 0; i < 64; i++)
+ regs_buff[53 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i));
+ for (i = 0; i < 64; i++)
+ regs_buff[117 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i));
+ for (i = 0; i < 64; i++)
+ regs_buff[181 + i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i));
+ for (i = 0; i < 64; i++)
+ regs_buff[245 + i] = IXGBE_READ_REG(hw, IXGBE_RDH(i));
+ for (i = 0; i < 64; i++)
+ regs_buff[309 + i] = IXGBE_READ_REG(hw, IXGBE_RDT(i));
+ for (i = 0; i < 64; i++)
+ regs_buff[373 + i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
+ for (i = 0; i < 16; i++)
+ regs_buff[437 + i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
+ for (i = 0; i < 16; i++)
+ regs_buff[453 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
+ regs_buff[469] = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+ for (i = 0; i < 8; i++)
+ regs_buff[470 + i] = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
+ regs_buff[478] = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+ regs_buff[479] = IXGBE_READ_REG(hw, IXGBE_DROPEN);
+
+ /* Receive */
+ regs_buff[480] = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
+ regs_buff[481] = IXGBE_READ_REG(hw, IXGBE_RFCTL);
+ for (i = 0; i < 16; i++)
+ regs_buff[482 + i] = IXGBE_READ_REG(hw, IXGBE_RAL(i));
+ for (i = 0; i < 16; i++)
+ regs_buff[498 + i] = IXGBE_READ_REG(hw, IXGBE_RAH(i));
+ regs_buff[514] = IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0));
+ regs_buff[515] = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+ regs_buff[516] = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+ regs_buff[517] = IXGBE_READ_REG(hw, IXGBE_MCSTCTRL);
+ regs_buff[518] = IXGBE_READ_REG(hw, IXGBE_MRQC);
+ regs_buff[519] = IXGBE_READ_REG(hw, IXGBE_VMD_CTL);
+ for (i = 0; i < 8; i++)
+ regs_buff[520 + i] = IXGBE_READ_REG(hw, IXGBE_IMIR(i));
+ for (i = 0; i < 8; i++)
+ regs_buff[528 + i] = IXGBE_READ_REG(hw, IXGBE_IMIREXT(i));
+ regs_buff[536] = IXGBE_READ_REG(hw, IXGBE_IMIRVP);
+
+ /* Transmit */
+ for (i = 0; i < 32; i++)
+ regs_buff[537 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i));
+ for (i = 0; i < 32; i++)
+ regs_buff[569 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i));
+ for (i = 0; i < 32; i++)
+ regs_buff[601 + i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i));
+ for (i = 0; i < 32; i++)
+ regs_buff[633 + i] = IXGBE_READ_REG(hw, IXGBE_TDH(i));
+ for (i = 0; i < 32; i++)
+ regs_buff[665 + i] = IXGBE_READ_REG(hw, IXGBE_TDT(i));
+ for (i = 0; i < 32; i++)
+ regs_buff[697 + i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
+ for (i = 0; i < 32; i++)
+ regs_buff[729 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAL(i));
+ for (i = 0; i < 32; i++)
+ regs_buff[761 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAH(i));
+ regs_buff[793] = IXGBE_READ_REG(hw, IXGBE_DTXCTL);
+ for (i = 0; i < 16; i++)
+ regs_buff[794 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
+ regs_buff[810] = IXGBE_READ_REG(hw, IXGBE_TIPG);
+ for (i = 0; i < 8; i++)
+ regs_buff[811 + i] = IXGBE_READ_REG(hw, IXGBE_TXPBSIZE(i));
+ regs_buff[819] = IXGBE_READ_REG(hw, IXGBE_MNGTXMAP);
+
+ /* Wake Up */
+ regs_buff[820] = IXGBE_READ_REG(hw, IXGBE_WUC);
+ regs_buff[821] = IXGBE_READ_REG(hw, IXGBE_WUFC);
+ regs_buff[822] = IXGBE_READ_REG(hw, IXGBE_WUS);
+ regs_buff[823] = IXGBE_READ_REG(hw, IXGBE_IPAV);
+ regs_buff[824] = IXGBE_READ_REG(hw, IXGBE_IP4AT);
+ regs_buff[825] = IXGBE_READ_REG(hw, IXGBE_IP6AT);
+ regs_buff[826] = IXGBE_READ_REG(hw, IXGBE_WUPL);
+ regs_buff[827] = IXGBE_READ_REG(hw, IXGBE_WUPM);
+ regs_buff[828] = IXGBE_READ_REG(hw, IXGBE_FHFT(0));
+
+ /* DCB */
+ regs_buff[829] = IXGBE_READ_REG(hw, IXGBE_RMCS);
+ regs_buff[830] = IXGBE_READ_REG(hw, IXGBE_DPMCS);
+ regs_buff[831] = IXGBE_READ_REG(hw, IXGBE_PDPMCS);
+ regs_buff[832] = IXGBE_READ_REG(hw, IXGBE_RUPPBMR);
+ for (i = 0; i < 8; i++)
+ regs_buff[833 + i] = IXGBE_READ_REG(hw, IXGBE_RT2CR(i));
+ for (i = 0; i < 8; i++)
+ regs_buff[841 + i] = IXGBE_READ_REG(hw, IXGBE_RT2SR(i));
+ for (i = 0; i < 8; i++)
+ regs_buff[849 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCCR(i));
+ for (i = 0; i < 8; i++)
+ regs_buff[857 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCSR(i));
+ for (i = 0; i < 8; i++)
+ regs_buff[865 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCCR(i));
+ for (i = 0; i < 8; i++)
+ regs_buff[873 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCSR(i));
+
+ /* Statistics */
+ regs_buff[881] = IXGBE_GET_STAT(adapter, crcerrs);
+ regs_buff[882] = IXGBE_GET_STAT(adapter, illerrc);
+ regs_buff[883] = IXGBE_GET_STAT(adapter, errbc);
+ regs_buff[884] = IXGBE_GET_STAT(adapter, mspdc);
+ for (i = 0; i < 8; i++)
+ regs_buff[885 + i] = IXGBE_GET_STAT(adapter, mpc[i]);
+ regs_buff[893] = IXGBE_GET_STAT(adapter, mlfc);
+ regs_buff[894] = IXGBE_GET_STAT(adapter, mrfc);
+ regs_buff[895] = IXGBE_GET_STAT(adapter, rlec);
+ regs_buff[896] = IXGBE_GET_STAT(adapter, lxontxc);
+ regs_buff[897] = IXGBE_GET_STAT(adapter, lxonrxc);
+ regs_buff[898] = IXGBE_GET_STAT(adapter, lxofftxc);
+ regs_buff[899] = IXGBE_GET_STAT(adapter, lxoffrxc);
+ for (i = 0; i < 8; i++)
+ regs_buff[900 + i] = IXGBE_GET_STAT(adapter, pxontxc[i]);
+ for (i = 0; i < 8; i++)
+ regs_buff[908 + i] = IXGBE_GET_STAT(adapter, pxonrxc[i]);
+ for (i = 0; i < 8; i++)
+ regs_buff[916 + i] = IXGBE_GET_STAT(adapter, pxofftxc[i]);
+ for (i = 0; i < 8; i++)
+ regs_buff[924 + i] = IXGBE_GET_STAT(adapter, pxoffrxc[i]);
+ regs_buff[932] = IXGBE_GET_STAT(adapter, prc64);
+ regs_buff[933] = IXGBE_GET_STAT(adapter, prc127);
+ regs_buff[934] = IXGBE_GET_STAT(adapter, prc255);
+ regs_buff[935] = IXGBE_GET_STAT(adapter, prc511);
+ regs_buff[936] = IXGBE_GET_STAT(adapter, prc1023);
+ regs_buff[937] = IXGBE_GET_STAT(adapter, prc1522);
+ regs_buff[938] = IXGBE_GET_STAT(adapter, gprc);
+ regs_buff[939] = IXGBE_GET_STAT(adapter, bprc);
+ regs_buff[940] = IXGBE_GET_STAT(adapter, mprc);
+ regs_buff[941] = IXGBE_GET_STAT(adapter, gptc);
+ regs_buff[942] = IXGBE_GET_STAT(adapter, gorc);
+ regs_buff[944] = IXGBE_GET_STAT(adapter, gotc);
+ for (i = 0; i < 8; i++)
+ regs_buff[946 + i] = IXGBE_GET_STAT(adapter, rnbc[i]);
+ regs_buff[954] = IXGBE_GET_STAT(adapter, ruc);
+ regs_buff[955] = IXGBE_GET_STAT(adapter, rfc);
+ regs_buff[956] = IXGBE_GET_STAT(adapter, roc);
+ regs_buff[957] = IXGBE_GET_STAT(adapter, rjc);
+ regs_buff[958] = IXGBE_GET_STAT(adapter, mngprc);
+ regs_buff[959] = IXGBE_GET_STAT(adapter, mngpdc);
+ regs_buff[960] = IXGBE_GET_STAT(adapter, mngptc);
+ regs_buff[961] = IXGBE_GET_STAT(adapter, tor);
+ regs_buff[963] = IXGBE_GET_STAT(adapter, tpr);
+ regs_buff[964] = IXGBE_GET_STAT(adapter, tpt);
+ regs_buff[965] = IXGBE_GET_STAT(adapter, ptc64);
+ regs_buff[966] = IXGBE_GET_STAT(adapter, ptc127);
+ regs_buff[967] = IXGBE_GET_STAT(adapter, ptc255);
+ regs_buff[968] = IXGBE_GET_STAT(adapter, ptc511);
+ regs_buff[969] = IXGBE_GET_STAT(adapter, ptc1023);
+ regs_buff[970] = IXGBE_GET_STAT(adapter, ptc1522);
+ regs_buff[971] = IXGBE_GET_STAT(adapter, mptc);
+ regs_buff[972] = IXGBE_GET_STAT(adapter, bptc);
+ regs_buff[973] = IXGBE_GET_STAT(adapter, xec);
+ for (i = 0; i < 16; i++)
+ regs_buff[974 + i] = IXGBE_GET_STAT(adapter, qprc[i]);
+ for (i = 0; i < 16; i++)
+ regs_buff[990 + i] = IXGBE_GET_STAT(adapter, qptc[i]);
+ for (i = 0; i < 16; i++)
+ regs_buff[1006 + i] = IXGBE_GET_STAT(adapter, qbrc[i]);
+ for (i = 0; i < 16; i++)
+ regs_buff[1022 + i] = IXGBE_GET_STAT(adapter, qbtc[i]);
+
+ /* MAC */
+ regs_buff[1038] = IXGBE_READ_REG(hw, IXGBE_PCS1GCFIG);
+ regs_buff[1039] = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL);
+ regs_buff[1040] = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA);
+ regs_buff[1041] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG0);
+ regs_buff[1042] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG1);
+ regs_buff[1043] = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
+ regs_buff[1044] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP);
+ regs_buff[1045] = IXGBE_READ_REG(hw, IXGBE_PCS1GANNP);
+ regs_buff[1046] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLPNP);
+ regs_buff[1047] = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+ regs_buff[1048] = IXGBE_READ_REG(hw, IXGBE_HLREG1);
+ regs_buff[1049] = IXGBE_READ_REG(hw, IXGBE_PAP);
+ regs_buff[1050] = IXGBE_READ_REG(hw, IXGBE_MACA);
+ regs_buff[1051] = IXGBE_READ_REG(hw, IXGBE_APAE);
+ regs_buff[1052] = IXGBE_READ_REG(hw, IXGBE_ARD);
+ regs_buff[1053] = IXGBE_READ_REG(hw, IXGBE_AIS);
+ regs_buff[1054] = IXGBE_READ_REG(hw, IXGBE_MSCA);
+ regs_buff[1055] = IXGBE_READ_REG(hw, IXGBE_MSRWD);
+ regs_buff[1056] = IXGBE_READ_REG(hw, IXGBE_MLADD);
+ regs_buff[1057] = IXGBE_READ_REG(hw, IXGBE_MHADD);
+ regs_buff[1058] = IXGBE_READ_REG(hw, IXGBE_TREG);
+ regs_buff[1059] = IXGBE_READ_REG(hw, IXGBE_PCSS1);
+ regs_buff[1060] = IXGBE_READ_REG(hw, IXGBE_PCSS2);
+ regs_buff[1061] = IXGBE_READ_REG(hw, IXGBE_XPCSS);
+ regs_buff[1062] = IXGBE_READ_REG(hw, IXGBE_SERDESC);
+ regs_buff[1063] = IXGBE_READ_REG(hw, IXGBE_MACS);
+ regs_buff[1064] = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ regs_buff[1065] = IXGBE_READ_REG(hw, IXGBE_LINKS);
+ regs_buff[1066] = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+ regs_buff[1067] = IXGBE_READ_REG(hw, IXGBE_AUTOC3);
+ regs_buff[1068] = IXGBE_READ_REG(hw, IXGBE_ANLP1);
+ regs_buff[1069] = IXGBE_READ_REG(hw, IXGBE_ANLP2);
+ regs_buff[1070] = IXGBE_READ_REG(hw, IXGBE_ATLASCTL);
+
+ /* Diagnostic */
+ regs_buff[1071] = IXGBE_READ_REG(hw, IXGBE_RDSTATCTL);
+ for (i = 0; i < 8; i++)
+ regs_buff[1072 + i] = IXGBE_READ_REG(hw, IXGBE_RDSTAT(i));
+ regs_buff[1080] = IXGBE_READ_REG(hw, IXGBE_RDHMPN);
+ for (i = 0; i < 4; i++)
+ regs_buff[1081 + i] = IXGBE_READ_REG(hw, IXGBE_RIC_DW(i));
+ regs_buff[1085] = IXGBE_READ_REG(hw, IXGBE_RDPROBE);
+ regs_buff[1086] = IXGBE_READ_REG(hw, IXGBE_TDSTATCTL);
+ for (i = 0; i < 8; i++)
+ regs_buff[1087 + i] = IXGBE_READ_REG(hw, IXGBE_TDSTAT(i));
+ regs_buff[1095] = IXGBE_READ_REG(hw, IXGBE_TDHMPN);
+ for (i = 0; i < 4; i++)
+ regs_buff[1096 + i] = IXGBE_READ_REG(hw, IXGBE_TIC_DW(i));
+ regs_buff[1100] = IXGBE_READ_REG(hw, IXGBE_TDPROBE);
+ regs_buff[1101] = IXGBE_READ_REG(hw, IXGBE_TXBUFCTRL);
+ regs_buff[1102] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA0);
+ regs_buff[1103] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA1);
+ regs_buff[1104] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA2);
+ regs_buff[1105] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA3);
+ regs_buff[1106] = IXGBE_READ_REG(hw, IXGBE_RXBUFCTRL);
+ regs_buff[1107] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA0);
+ regs_buff[1108] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA1);
+ regs_buff[1109] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA2);
+ regs_buff[1110] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA3);
+ for (i = 0; i < 8; i++)
+ regs_buff[1111 + i] = IXGBE_READ_REG(hw, IXGBE_PCIE_DIAG(i));
+ regs_buff[1119] = IXGBE_READ_REG(hw, IXGBE_RFVAL);
+ regs_buff[1120] = IXGBE_READ_REG(hw, IXGBE_MDFTC1);
+ regs_buff[1121] = IXGBE_READ_REG(hw, IXGBE_MDFTC2);
+ regs_buff[1122] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO1);
+ regs_buff[1123] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO2);
+ regs_buff[1124] = IXGBE_READ_REG(hw, IXGBE_MDFTS);
+ regs_buff[1125] = IXGBE_READ_REG(hw, IXGBE_PCIEECCCTL);
+ regs_buff[1126] = IXGBE_READ_REG(hw, IXGBE_PBTXECC);
+ regs_buff[1127] = IXGBE_READ_REG(hw, IXGBE_PBRXECC);
+
+ /* 82599 X540 specific registers */
+ regs_buff[1128] = IXGBE_READ_REG(hw, IXGBE_MFLCN);
+}
+
+static int ixgbe_get_eeprom_len(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ return adapter->hw.eeprom.word_size * 2;
+}
+
+static int ixgbe_get_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+ u16 *eeprom_buff;
+ int first_word, last_word, eeprom_len;
+ int ret_val = 0;
+ u16 i;
+
+ if (eeprom->len == 0)
+ return -EINVAL;
+
+ eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+ first_word = eeprom->offset >> 1;
+ last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+ eeprom_len = last_word - first_word + 1;
+
+ eeprom_buff = kmalloc(sizeof(u16) * eeprom_len, GFP_KERNEL);
+ if (!eeprom_buff)
+ return -ENOMEM;
+
+ ret_val = ixgbe_read_eeprom_buffer(hw, first_word, eeprom_len,
+ eeprom_buff);
+
+ /* Device's eeprom is always little-endian, word addressable */
+ for (i = 0; i < eeprom_len; i++)
+ le16_to_cpus(&eeprom_buff[i]);
+
+ memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len);
+ kfree(eeprom_buff);
+
+ return ret_val;
+}
+
+static int ixgbe_set_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+ u16 *eeprom_buff;
+ void *ptr;
+ int max_len, first_word, last_word, ret_val = 0;
+ u16 i;
+
+ if (eeprom->len == 0)
+ return -EINVAL;
+
+ if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+ return -EINVAL;
+
+ max_len = hw->eeprom.word_size * 2;
+
+ first_word = eeprom->offset >> 1;
+ last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+ eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+ if (!eeprom_buff)
+ return -ENOMEM;
+
+ ptr = eeprom_buff;
+
+ if (eeprom->offset & 1) {
+ /*
+ * need read/modify/write of first changed EEPROM word
+ * only the second byte of the word is being modified
+ */
+ ret_val = ixgbe_read_eeprom(hw, first_word, &eeprom_buff[0]);
+ if (ret_val)
+ goto err;
+
+ ptr++;
+ }
+ if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) {
+ /*
+ * need read/modify/write of last changed EEPROM word
+ * only the first byte of the word is being modified
+ */
+ ret_val = ixgbe_read_eeprom(hw, last_word,
+ &eeprom_buff[last_word - first_word]);
+ if (ret_val)
+ goto err;
+ }
+
+ /* Device's eeprom is always little-endian, word addressable */
+ for (i = 0; i < last_word - first_word + 1; i++)
+ le16_to_cpus(&eeprom_buff[i]);
+
+ memcpy(ptr, bytes, eeprom->len);
+
+ for (i = 0; i < last_word - first_word + 1; i++)
+ cpu_to_le16s(&eeprom_buff[i]);
+
+ ret_val = ixgbe_write_eeprom_buffer(hw, first_word,
+ last_word - first_word + 1,
+ eeprom_buff);
+
+ /* Update the checksum */
+ if (ret_val == 0)
+ ixgbe_update_eeprom_checksum(hw);
+
+err:
+ kfree(eeprom_buff);
+ return ret_val;
+}
+
+static void ixgbe_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ strlcpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver));
+
+ strlcpy(drvinfo->version, ixgbe_driver_version,
+ sizeof(drvinfo->version));
+
+ strlcpy(drvinfo->fw_version, adapter->eeprom_id,
+ sizeof(drvinfo->fw_version));
+
+ strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
+ sizeof(drvinfo->bus_info));
+
+ drvinfo->n_stats = IXGBE_STATS_LEN;
+ drvinfo->testinfo_len = IXGBE_TEST_LEN;
+ drvinfo->regdump_len = ixgbe_get_regs_len(netdev);
+}
+
+static void ixgbe_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ ring->rx_max_pending = IXGBE_MAX_RXD;
+ ring->tx_max_pending = IXGBE_MAX_TXD;
+ ring->rx_mini_max_pending = 0;
+ ring->rx_jumbo_max_pending = 0;
+ ring->rx_pending = adapter->rx_ring_count;
+ ring->tx_pending = adapter->tx_ring_count;
+ ring->rx_mini_pending = 0;
+ ring->rx_jumbo_pending = 0;
+}
+
+static int ixgbe_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_ring *tx_ring = NULL, *rx_ring = NULL;
+ u32 new_rx_count, new_tx_count;
+ int i, err = 0;
+
+ if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+ return -EINVAL;
+
+ new_tx_count = clamp_t(u32, ring->tx_pending,
+ IXGBE_MIN_TXD, IXGBE_MAX_TXD);
+ new_tx_count = ALIGN(new_tx_count, IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE);
+
+ new_rx_count = clamp_t(u32, ring->rx_pending,
+ IXGBE_MIN_RXD, IXGBE_MAX_RXD);
+ new_rx_count = ALIGN(new_rx_count, IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE);
+
+ /* if nothing to do return success */
+ if ((new_tx_count == adapter->tx_ring_count) &&
+ (new_rx_count == adapter->rx_ring_count))
+ return 0;
+
+ while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
+ usleep_range(1000, 2000);
+
+ if (!netif_running(adapter->netdev)) {
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ adapter->tx_ring[i]->count = new_tx_count;
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ adapter->rx_ring[i]->count = new_rx_count;
+ adapter->tx_ring_count = new_tx_count;
+ adapter->rx_ring_count = new_rx_count;
+ goto clear_reset;
+ }
+
+ /* alloc updated Tx resources */
+ if (new_tx_count != adapter->tx_ring_count) {
+ tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring));
+ if (!tx_ring) {
+ err = -ENOMEM;
+ goto clear_reset;
+ }
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ /* clone ring and setup updated count */
+ tx_ring[i] = *adapter->tx_ring[i];
+ tx_ring[i].count = new_tx_count;
+ err = ixgbe_setup_tx_resources(&tx_ring[i]);
+ if (err) {
+ while (i) {
+ i--;
+ ixgbe_free_tx_resources(&tx_ring[i]);
+ }
+
+ vfree(tx_ring);
+ tx_ring = NULL;
+
+ goto clear_reset;
+ }
+ }
+ }
+
+ /* alloc updated Rx resources */
+ if (new_rx_count != adapter->rx_ring_count) {
+ rx_ring = vmalloc(adapter->num_rx_queues * sizeof(*rx_ring));
+ if (!rx_ring) {
+ err = -ENOMEM;
+ goto clear_reset;
+ }
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ /* clone ring and setup updated count */
+ rx_ring[i] = *adapter->rx_ring[i];
+ rx_ring[i].count = new_rx_count;
+ err = ixgbe_setup_rx_resources(&rx_ring[i]);
+ if (err) {
+ while (i) {
+ i--;
+ ixgbe_free_rx_resources(&rx_ring[i]);
+ }
+
+ vfree(rx_ring);
+ rx_ring = NULL;
+
+ goto clear_reset;
+ }
+ }
+ }
+
+ /* bring interface down to prepare for update */
+ ixgbe_down(adapter);
+
+ /* Tx */
+ if (tx_ring) {
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ ixgbe_free_tx_resources(adapter->tx_ring[i]);
+ *adapter->tx_ring[i] = tx_ring[i];
+ }
+ adapter->tx_ring_count = new_tx_count;
+
+ vfree(tx_ring);
+ tx_ring = NULL;
+ }
+
+ /* Rx */
+ if (rx_ring) {
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ ixgbe_free_rx_resources(adapter->rx_ring[i]);
+ *adapter->rx_ring[i] = rx_ring[i];
+ }
+ adapter->rx_ring_count = new_rx_count;
+
+ vfree(rx_ring);
+ rx_ring = NULL;
+ }
+
+ /* restore interface using new values */
+ ixgbe_up(adapter);
+
+clear_reset:
+ /* free Tx resources if Rx error is encountered */
+ if (tx_ring) {
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ ixgbe_free_tx_resources(&tx_ring[i]);
+ vfree(tx_ring);
+ }
+
+ clear_bit(__IXGBE_RESETTING, &adapter->state);
+ return err;
+}
+
+#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
+static int ixgbe_get_stats_count(struct net_device *netdev)
+{
+ return IXGBE_STATS_LEN;
+}
+
+#else /* HAVE_ETHTOOL_GET_SSET_COUNT */
+static int ixgbe_get_sset_count(struct net_device *netdev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_TEST:
+ return IXGBE_TEST_LEN;
+ case ETH_SS_STATS:
+ return IXGBE_STATS_LEN;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
+static void ixgbe_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+ struct net_device_stats *net_stats = &netdev->stats;
+#else
+ struct net_device_stats *net_stats = &adapter->net_stats;
+#endif
+ u64 *queue_stat;
+ int stat_count = sizeof(struct ixgbe_queue_stats) / sizeof(u64);
+ int i, j, k;
+ char *p;
+
+ printk(KERN_DEBUG "ixgbe_stats 0\n");
+ ixgbe_update_stats(adapter);
+ printk(KERN_DEBUG "ixgbe_stats 1\n");
+
+ for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) {
+ p = (char *)net_stats + ixgbe_gstrings_net_stats[i].stat_offset;
+ data[i] = (ixgbe_gstrings_net_stats[i].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+ for (j = 0; j < IXGBE_GLOBAL_STATS_LEN; j++, i++) {
+ p = (char *)adapter + ixgbe_gstrings_stats[j].stat_offset;
+ data[i] = (ixgbe_gstrings_stats[j].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+ printk(KERN_DEBUG "ixgbe_stats 2\n");
+#ifdef NO_VNIC
+ for (j = 0; j < adapter->num_tx_queues; j++) {
+ queue_stat = (u64 *)&adapter->tx_ring[j]->stats;
+ for (k = 0; k < stat_count; k++)
+ data[i + k] = queue_stat[k];
+ i += k;
+ }
+ for (j = 0; j < adapter->num_rx_queues; j++) {
+ queue_stat = (u64 *)&adapter->rx_ring[j]->stats;
+ for (k = 0; k < stat_count; k++)
+ data[i + k] = queue_stat[k];
+ i += k;
+ }
+ printk(KERN_DEBUG "ixgbe_stats 3\n");
+#endif
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+ for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) {
+ data[i++] = adapter->stats.pxontxc[j];
+ data[i++] = adapter->stats.pxofftxc[j];
+ }
+ for (j = 0; j < MAX_RX_PACKET_BUFFERS; j++) {
+ data[i++] = adapter->stats.pxonrxc[j];
+ data[i++] = adapter->stats.pxoffrxc[j];
+ }
+ }
+ printk(KERN_DEBUG "ixgbe_stats 4\n");
+ stat_count = sizeof(struct vf_stats) / sizeof(u64);
+ for (j = 0; j < adapter->num_vfs; j++) {
+ queue_stat = (u64 *)&adapter->vfinfo[j].vfstats;
+ for (k = 0; k < stat_count; k++)
+ data[i + k] = queue_stat[k];
+ queue_stat = (u64 *)&adapter->vfinfo[j].saved_rst_vfstats;
+ for (k = 0; k < stat_count; k++)
+ data[i + k] += queue_stat[k];
+ i += k;
+ }
+}
+
+static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
+ u8 *data)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ char *p = (char *)data;
+ int i;
+
+ switch (stringset) {
+ case ETH_SS_TEST:
+ memcpy(data, *ixgbe_gstrings_test,
+ IXGBE_TEST_LEN * ETH_GSTRING_LEN);
+ break;
+ case ETH_SS_STATS:
+ for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) {
+ memcpy(p, ixgbe_gstrings_net_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) {
+ memcpy(p, ixgbe_gstrings_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ sprintf(p, "tx_queue_%u_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_queue_%u_bytes", i);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ sprintf(p, "rx_queue_%u_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_bytes", i);
+ p += ETH_GSTRING_LEN;
+ }
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+ for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
+ sprintf(p, "tx_pb_%u_pxon", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_pb_%u_pxoff", i);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < MAX_RX_PACKET_BUFFERS; i++) {
+ sprintf(p, "rx_pb_%u_pxon", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_pb_%u_pxoff", i);
+ p += ETH_GSTRING_LEN;
+ }
+ }
+ for (i = 0; i < adapter->num_vfs; i++) {
+ sprintf(p, "VF %d Rx Packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "VF %d Rx Bytes", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "VF %d Tx Packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "VF %d Tx Bytes", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "VF %d MC Packets", i);
+ p += ETH_GSTRING_LEN;
+ }
+ /* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */
+ break;
+ }
+}
+
+static int ixgbe_link_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ bool link_up;
+ u32 link_speed = 0;
+ *data = 0;
+
+ hw->mac.ops.check_link(hw, &link_speed, &link_up, true);
+ if (link_up)
+ return *data;
+ else
+ *data = 1;
+ return *data;
+}
+
+/* ethtool register test data */
+struct ixgbe_reg_test {
+ u16 reg;
+ u8 array_len;
+ u8 test_type;
+ u32 mask;
+ u32 write;
+};
+
+/* In the hardware, registers are laid out either singly, in arrays
+ * spaced 0x40 bytes apart, or in contiguous tables. We assume
+ * most tests take place on arrays or single registers (handled
+ * as a single-element array) and special-case the tables.
+ * Table tests are always pattern tests.
+ *
+ * We also make provision for some required setup steps by specifying
+ * registers to be written without any read-back testing.
+ */
+
+#define PATTERN_TEST 1
+#define SET_READ_TEST 2
+#define WRITE_NO_TEST 3
+#define TABLE32_TEST 4
+#define TABLE64_TEST_LO 5
+#define TABLE64_TEST_HI 6
+
+/* default 82599 register test */
+static struct ixgbe_reg_test reg_test_82599[] = {
+ { IXGBE_FCRTL_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+ { IXGBE_FCRTH_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+ { IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 },
+ { IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFF80 },
+ { IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+ { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE },
+ { IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 },
+ { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+ { IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFF80 },
+ { IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000001, 0x00000001 },
+ { IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
+ { IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x8001FFFF, 0x800CFFFF },
+ { IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0, 0, 0, 0 }
+};
+
+/* default 82598 register test */
+static struct ixgbe_reg_test reg_test_82598[] = {
+ { IXGBE_FCRTL(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+ { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+ { IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 },
+ { IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+ /* Enable all four RX queues before testing. */
+ { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE },
+ /* RDH is read-only for 82598, only test RDT. */
+ { IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+ { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 },
+ { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+ { IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { IXGBE_TIPG, 1, PATTERN_TEST, 0x000000FF, 0x000000FF },
+ { IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+ { IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+ { IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000003, 0x00000003 },
+ { IXGBE_DTXCTL, 1, SET_READ_TEST, 0x00000005, 0x00000005 },
+ { IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
+ { IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x800CFFFF, 0x800CFFFF },
+ { IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0, 0, 0, 0 }
+};
+
+#define REG_PATTERN_TEST(R, M, W) \
+{ \
+ u32 pat, val, before; \
+ const u32 _test[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; \
+ for (pat = 0; pat < ARRAY_SIZE(_test); pat++) { \
+ before = readl(adapter->hw.hw_addr + R); \
+ writel((_test[pat] & W), (adapter->hw.hw_addr + R)); \
+ val = readl(adapter->hw.hw_addr + R); \
+ if (val != (_test[pat] & W & M)) { \
+ e_err(drv, "pattern test reg %04X failed: got " \
+ "0x%08X expected 0x%08X\n", \
+ R, val, (_test[pat] & W & M)); \
+ *data = R; \
+ writel(before, adapter->hw.hw_addr + R); \
+ return 1; \
+ } \
+ writel(before, adapter->hw.hw_addr + R); \
+ } \
+}
+
+#define REG_SET_AND_CHECK(R, M, W) \
+{ \
+ u32 val, before; \
+ before = readl(adapter->hw.hw_addr + R); \
+ writel((W & M), (adapter->hw.hw_addr + R)); \
+ val = readl(adapter->hw.hw_addr + R); \
+ if ((W & M) != (val & M)) { \
+ e_err(drv, "set/check reg %04X test failed: got 0x%08X " \
+ "expected 0x%08X\n", R, (val & M), (W & M)); \
+ *data = R; \
+ writel(before, (adapter->hw.hw_addr + R)); \
+ return 1; \
+ } \
+ writel(before, (adapter->hw.hw_addr + R)); \
+}
+
+static int ixgbe_reg_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+ struct ixgbe_reg_test *test;
+ u32 value, status_before, status_after;
+ u32 i, toggle;
+
+ switch (adapter->hw.mac.type) {
+ case ixgbe_mac_82598EB:
+ toggle = 0x7FFFF3FF;
+ test = reg_test_82598;
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ toggle = 0x7FFFF30F;
+ test = reg_test_82599;
+ break;
+ default:
+ *data = 1;
+ return 1;
+ break;
+ }
+
+ /*
+ * Because the status register is such a special case,
+ * we handle it separately from the rest of the register
+ * tests. Some bits are read-only, some toggle, and some
+ * are writeable on newer MACs.
+ */
+ status_before = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS);
+ value = (IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle);
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, toggle);
+ status_after = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle;
+ if (value != status_after) {
+ e_err(drv, "failed STATUS register test got: "
+ "0x%08X expected: 0x%08X\n", status_after, value);
+ *data = 1;
+ return 1;
+ }
+ /* restore previous status */
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, status_before);
+
+ /*
+ * Perform the remainder of the register test, looping through
+ * the test table until we either fail or reach the null entry.
+ */
+ while (test->reg) {
+ for (i = 0; i < test->array_len; i++) {
+ switch (test->test_type) {
+ case PATTERN_TEST:
+ REG_PATTERN_TEST(test->reg + (i * 0x40),
+ test->mask,
+ test->write);
+ break;
+ case SET_READ_TEST:
+ REG_SET_AND_CHECK(test->reg + (i * 0x40),
+ test->mask,
+ test->write);
+ break;
+ case WRITE_NO_TEST:
+ writel(test->write,
+ (adapter->hw.hw_addr + test->reg)
+ + (i * 0x40));
+ break;
+ case TABLE32_TEST:
+ REG_PATTERN_TEST(test->reg + (i * 4),
+ test->mask,
+ test->write);
+ break;
+ case TABLE64_TEST_LO:
+ REG_PATTERN_TEST(test->reg + (i * 8),
+ test->mask,
+ test->write);
+ break;
+ case TABLE64_TEST_HI:
+ REG_PATTERN_TEST((test->reg + 4) + (i * 8),
+ test->mask,
+ test->write);
+ break;
+ }
+ }
+ test++;
+ }
+
+ *data = 0;
+ return 0;
+}
+
+static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+ if (ixgbe_validate_eeprom_checksum(&adapter->hw, NULL))
+ *data = 1;
+ else
+ *data = 0;
+ return *data;
+}
+
+static irqreturn_t ixgbe_test_intr(int irq, void *data)
+{
+ struct net_device *netdev = (struct net_device *) data;
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR);
+
+ return IRQ_HANDLED;
+}
+
+static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+ struct net_device *netdev = adapter->netdev;
+ u32 mask, i = 0, shared_int = true;
+ u32 irq = adapter->pdev->irq;
+
+ *data = 0;
+
+ /* Hook up test interrupt handler just for this test */
+ if (adapter->msix_entries) {
+ /* NOTE: we don't test MSI-X interrupts here, yet */
+ return 0;
+ } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) {
+ shared_int = false;
+ if (request_irq(irq, &ixgbe_test_intr, 0, netdev->name,
+ netdev)) {
+ *data = 1;
+ return -1;
+ }
+ } else if (!request_irq(irq, &ixgbe_test_intr, IRQF_PROBE_SHARED,
+ netdev->name, netdev)) {
+ shared_int = false;
+ } else if (request_irq(irq, &ixgbe_test_intr, IRQF_SHARED,
+ netdev->name, netdev)) {
+ *data = 1;
+ return -1;
+ }
+ e_info(hw, "testing %s interrupt\n",
+ (shared_int ? "shared" : "unshared"));
+
+ /* Disable all the interrupts */
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF);
+ IXGBE_WRITE_FLUSH(&adapter->hw);
+ usleep_range(10000, 20000);
+
+ /* Test each interrupt */
+ for (; i < 10; i++) {
+ /* Interrupt to test */
+ mask = 1 << i;
+
+ if (!shared_int) {
+ /*
+ * Disable the interrupts to be reported in
+ * the cause register and then force the same
+ * interrupt and see if one gets posted. If
+ * an interrupt was posted to the bus, the
+ * test failed.
+ */
+ adapter->test_icr = 0;
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
+ ~mask & 0x00007FFF);
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
+ ~mask & 0x00007FFF);
+ IXGBE_WRITE_FLUSH(&adapter->hw);
+ usleep_range(10000, 20000);
+
+ if (adapter->test_icr & mask) {
+ *data = 3;
+ break;
+ }
+ }
+
+ /*
+ * Enable the interrupt to be reported in the cause
+ * register and then force the same interrupt and see
+ * if one gets posted. If an interrupt was not posted
+ * to the bus, the test failed.
+ */
+ adapter->test_icr = 0;
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask);
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
+ IXGBE_WRITE_FLUSH(&adapter->hw);
+ usleep_range(10000, 20000);
+
+ if (!(adapter->test_icr & mask)) {
+ *data = 4;
+ break;
+ }
+
+ if (!shared_int) {
+ /*
+ * Disable the other interrupts to be reported in
+ * the cause register and then force the other
+ * interrupts and see if any get posted. If
+ * an interrupt was posted to the bus, the
+ * test failed.
+ */
+ adapter->test_icr = 0;
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
+ ~mask & 0x00007FFF);
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
+ ~mask & 0x00007FFF);
+ IXGBE_WRITE_FLUSH(&adapter->hw);
+ usleep_range(10000, 20000);
+
+ if (adapter->test_icr) {
+ *data = 5;
+ break;
+ }
+ }
+ }
+
+ /* Disable all the interrupts */
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF);
+ IXGBE_WRITE_FLUSH(&adapter->hw);
+ usleep_range(10000, 20000);
+
+ /* Unhook test interrupt handler */
+ free_irq(irq, netdev);
+
+ return *data;
+}
+
+
+
+static int ixgbe_setup_loopback_test(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 reg_data;
+
+ /* X540 needs to set the MACC.FLU bit to force link up */
+ if (adapter->hw.mac.type == ixgbe_mac_X540) {
+ reg_data = IXGBE_READ_REG(hw, IXGBE_MACC);
+ reg_data |= IXGBE_MACC_FLU;
+ IXGBE_WRITE_REG(hw, IXGBE_MACC, reg_data);
+ }
+
+ /* right now we only support MAC loopback in the driver */
+ reg_data = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+ /* Setup MAC loopback */
+ reg_data |= IXGBE_HLREG0_LPBK;
+ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_data);
+
+ reg_data = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+ reg_data |= IXGBE_FCTRL_BAM | IXGBE_FCTRL_SBP | IXGBE_FCTRL_MPE;
+ IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg_data);
+
+ reg_data = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+ reg_data &= ~IXGBE_AUTOC_LMS_MASK;
+ reg_data |= IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU;
+ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_data);
+ IXGBE_WRITE_FLUSH(hw);
+ usleep_range(10000, 20000);
+
+ /* Disable Atlas Tx lanes; re-enabled in reset path */
+ if (hw->mac.type == ixgbe_mac_82598EB) {
+ u8 atlas;
+
+ ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &atlas);
+ atlas |= IXGBE_ATLAS_PDN_TX_REG_EN;
+ ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, atlas);
+
+ ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, &atlas);
+ atlas |= IXGBE_ATLAS_PDN_TX_10G_QL_ALL;
+ ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, atlas);
+
+ ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, &atlas);
+ atlas |= IXGBE_ATLAS_PDN_TX_1G_QL_ALL;
+ ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, atlas);
+
+ ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, &atlas);
+ atlas |= IXGBE_ATLAS_PDN_TX_AN_QL_ALL;
+ ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, atlas);
+ }
+
+ return 0;
+}
+
+static void ixgbe_loopback_cleanup(struct ixgbe_adapter *adapter)
+{
+ u32 reg_data;
+
+ reg_data = IXGBE_READ_REG(&adapter->hw, IXGBE_HLREG0);
+ reg_data &= ~IXGBE_HLREG0_LPBK;
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_HLREG0, reg_data);
+}
+
+
+
+
+
+
+static int ixgbe_loopback_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+
+ //*data = ixgbe_setup_desc_rings(adapter);
+ //if (*data)
+ // goto out;
+ *data = ixgbe_setup_loopback_test(adapter);
+ if (*data)
+ goto err_loopback;
+ //*data = ixgbe_run_loopback_test(adapter);
+ ixgbe_loopback_cleanup(adapter);
+
+err_loopback:
+ //ixgbe_free_desc_rings(adapter);
+//out:
+ return *data;
+
+}
+
+#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
+static int ixgbe_diag_test_count(struct net_device *netdev)
+{
+ return IXGBE_TEST_LEN;
+}
+
+#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
+static void ixgbe_diag_test(struct net_device *netdev,
+ struct ethtool_test *eth_test, u64 *data)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ bool if_running = netif_running(netdev);
+
+ set_bit(__IXGBE_TESTING, &adapter->state);
+ if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+ /* Offline tests */
+
+ e_info(hw, "offline testing starting\n");
+
+ /* Link test performed before hardware reset so autoneg doesn't
+ * interfere with test result */
+ if (ixgbe_link_test(adapter, &data[4]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+ int i;
+ for (i = 0; i < adapter->num_vfs; i++) {
+ if (adapter->vfinfo[i].clear_to_send) {
+ e_warn(drv, "Please take active VFS "
+ "offline and restart the "
+ "adapter before running NIC "
+ "diagnostics\n");
+ data[0] = 1;
+ data[1] = 1;
+ data[2] = 1;
+ data[3] = 1;
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+ clear_bit(__IXGBE_TESTING,
+ &adapter->state);
+ goto skip_ol_tests;
+ }
+ }
+ }
+
+ if (if_running)
+ /* indicate we're in test mode */
+ dev_close(netdev);
+ else
+ ixgbe_reset(adapter);
+
+ e_info(hw, "register testing starting\n");
+ if (ixgbe_reg_test(adapter, &data[0]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ ixgbe_reset(adapter);
+ e_info(hw, "eeprom testing starting\n");
+ if (ixgbe_eeprom_test(adapter, &data[1]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ ixgbe_reset(adapter);
+ e_info(hw, "interrupt testing starting\n");
+ if (ixgbe_intr_test(adapter, &data[2]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ /* If SRIOV or VMDq is enabled then skip MAC
+ * loopback diagnostic. */
+ if (adapter->flags & (IXGBE_FLAG_SRIOV_ENABLED |
+ IXGBE_FLAG_VMDQ_ENABLED)) {
+ e_info(hw, "skip MAC loopback diagnostic in VT mode\n");
+ data[3] = 0;
+ goto skip_loopback;
+ }
+
+ ixgbe_reset(adapter);
+ e_info(hw, "loopback testing starting\n");
+ if (ixgbe_loopback_test(adapter, &data[3]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+skip_loopback:
+ ixgbe_reset(adapter);
+
+ clear_bit(__IXGBE_TESTING, &adapter->state);
+ if (if_running)
+ dev_open(netdev);
+ } else {
+ e_info(hw, "online testing starting\n");
+ /* Online tests */
+ if (ixgbe_link_test(adapter, &data[4]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ /* Online tests aren't run; pass by default */
+ data[0] = 0;
+ data[1] = 0;
+ data[2] = 0;
+ data[3] = 0;
+
+ clear_bit(__IXGBE_TESTING, &adapter->state);
+ }
+skip_ol_tests:
+ msleep_interruptible(4 * 1000);
+}
+
+static int ixgbe_wol_exclusion(struct ixgbe_adapter *adapter,
+ struct ethtool_wolinfo *wol)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ int retval = 1;
+ u16 wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK;
+
+ /* WOL not supported except for the following */
+ switch (hw->device_id) {
+ case IXGBE_DEV_ID_82599_SFP:
+ /* Only these subdevice could supports WOL */
+ switch (hw->subsystem_device_id) {
+ case IXGBE_SUBDEV_ID_82599_560FLR:
+ /* only support first port */
+ if (hw->bus.func != 0) {
+ wol->supported = 0;
+ break;
+ }
+ case IXGBE_SUBDEV_ID_82599_SFP:
+ retval = 0;
+ break;
+ default:
+ wol->supported = 0;
+ break;
+ }
+ break;
+ case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
+ /* All except this subdevice support WOL */
+ if (hw->subsystem_device_id ==
+ IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) {
+ wol->supported = 0;
+ break;
+ }
+ retval = 0;
+ break;
+ case IXGBE_DEV_ID_82599_KX4:
+ retval = 0;
+ break;
+ case IXGBE_DEV_ID_X540T:
+ /* check eeprom to see if enabled wol */
+ if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
+ ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) &&
+ (hw->bus.func == 0))) {
+ retval = 0;
+ break;
+ }
+
+ /* All others not supported */
+ wol->supported = 0;
+ break;
+ default:
+ wol->supported = 0;
+ }
+ return retval;
+}
+
+static void ixgbe_get_wol(struct net_device *netdev,
+ struct ethtool_wolinfo *wol)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ wol->supported = WAKE_UCAST | WAKE_MCAST |
+ WAKE_BCAST | WAKE_MAGIC;
+ wol->wolopts = 0;
+
+ if (ixgbe_wol_exclusion(adapter, wol) ||
+ !device_can_wakeup(&adapter->pdev->dev))
+ return;
+
+ if (adapter->wol & IXGBE_WUFC_EX)
+ wol->wolopts |= WAKE_UCAST;
+ if (adapter->wol & IXGBE_WUFC_MC)
+ wol->wolopts |= WAKE_MCAST;
+ if (adapter->wol & IXGBE_WUFC_BC)
+ wol->wolopts |= WAKE_BCAST;
+ if (adapter->wol & IXGBE_WUFC_MAG)
+ wol->wolopts |= WAKE_MAGIC;
+}
+
+static int ixgbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE))
+ return -EOPNOTSUPP;
+
+ if (ixgbe_wol_exclusion(adapter, wol))
+ return wol->wolopts ? -EOPNOTSUPP : 0;
+
+ adapter->wol = 0;
+
+ if (wol->wolopts & WAKE_UCAST)
+ adapter->wol |= IXGBE_WUFC_EX;
+ if (wol->wolopts & WAKE_MCAST)
+ adapter->wol |= IXGBE_WUFC_MC;
+ if (wol->wolopts & WAKE_BCAST)
+ adapter->wol |= IXGBE_WUFC_BC;
+ if (wol->wolopts & WAKE_MAGIC)
+ adapter->wol |= IXGBE_WUFC_MAG;
+
+ device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+ return 0;
+}
+
+static int ixgbe_nway_reset(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ if (netif_running(netdev))
+ ixgbe_reinit_locked(adapter);
+
+ return 0;
+}
+
+#ifdef HAVE_ETHTOOL_SET_PHYS_ID
+static int ixgbe_set_phys_id(struct net_device *netdev,
+ enum ethtool_phys_id_state state)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ switch (state) {
+ case ETHTOOL_ID_ACTIVE:
+ adapter->led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+ return 2;
+
+ case ETHTOOL_ID_ON:
+ hw->mac.ops.led_on(hw, IXGBE_LED_ON);
+ break;
+
+ case ETHTOOL_ID_OFF:
+ hw->mac.ops.led_off(hw, IXGBE_LED_ON);
+ break;
+
+ case ETHTOOL_ID_INACTIVE:
+ /* Restore LED settings */
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_LEDCTL, adapter->led_reg);
+ break;
+ }
+
+ return 0;
+}
+#else
+static int ixgbe_phys_id(struct net_device *netdev, u32 data)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+ u32 i;
+
+ if (!data || data > 300)
+ data = 300;
+
+ for (i = 0; i < (data * 1000); i += 400) {
+ ixgbe_led_on(hw, IXGBE_LED_ON);
+ msleep_interruptible(200);
+ ixgbe_led_off(hw, IXGBE_LED_ON);
+ msleep_interruptible(200);
+ }
+
+ /* Restore LED settings */
+ IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+
+ return 0;
+}
+#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
+
+static int ixgbe_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit;
+#ifndef CONFIG_IXGBE_NAPI
+ ec->rx_max_coalesced_frames_irq = adapter->rx_work_limit;
+#endif /* CONFIG_IXGBE_NAPI */
+ /* only valid if in constant ITR mode */
+ if (adapter->rx_itr_setting <= 1)
+ ec->rx_coalesce_usecs = adapter->rx_itr_setting;
+ else
+ ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
+
+ /* if in mixed tx/rx queues per vector mode, report only rx settings */
+ if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count)
+ return 0;
+
+ /* only valid if in constant ITR mode */
+ if (adapter->tx_itr_setting <= 1)
+ ec->tx_coalesce_usecs = adapter->tx_itr_setting;
+ else
+ ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
+
+ return 0;
+}
+
+/*
+ * this function must be called before setting the new value of
+ * rx_itr_setting
+ */
+#ifdef NO_VNIC
+static bool ixgbe_update_rsc(struct ixgbe_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+
+ /* nothing to do if LRO or RSC are not enabled */
+ if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) ||
+ !(netdev->features & NETIF_F_LRO))
+ return false;
+
+ /* check the feature flag value and enable RSC if necessary */
+ if (adapter->rx_itr_setting == 1 ||
+ adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) {
+ if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) {
+ adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
+ e_info(probe, "rx-usecs value high enough "
+ "to re-enable RSC\n");
+ return true;
+ }
+ /* if interrupt rate is too high then disable RSC */
+ } else if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+ adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
+#ifdef IXGBE_NO_LRO
+ e_info(probe, "rx-usecs set too low, disabling RSC\n");
+#else
+ e_info(probe, "rx-usecs set too low, "
+ "falling back to software LRO\n");
+#endif
+ return true;
+ }
+ return false;
+}
+#endif
+
+static int ixgbe_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+#ifdef NO_VNIC
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_q_vector *q_vector;
+ int i;
+ int num_vectors;
+ u16 tx_itr_param, rx_itr_param;
+ bool need_reset = false;
+
+ /* don't accept tx specific changes if we've got mixed RxTx vectors */
+ if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count
+ && ec->tx_coalesce_usecs)
+ return -EINVAL;
+
+ if (ec->tx_max_coalesced_frames_irq)
+ adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq;
+
+#ifndef CONFIG_IXGBE_NAPI
+ if (ec->rx_max_coalesced_frames_irq)
+ adapter->rx_work_limit = ec->rx_max_coalesced_frames_irq;
+
+#endif
+ if ((ec->rx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)) ||
+ (ec->tx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)))
+ return -EINVAL;
+
+ if (ec->rx_coalesce_usecs > 1)
+ adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+ else
+ adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+
+ if (adapter->rx_itr_setting == 1)
+ rx_itr_param = IXGBE_20K_ITR;
+ else
+ rx_itr_param = adapter->rx_itr_setting;
+
+ if (ec->tx_coalesce_usecs > 1)
+ adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+ else
+ adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+
+ if (adapter->tx_itr_setting == 1)
+ tx_itr_param = IXGBE_10K_ITR;
+ else
+ tx_itr_param = adapter->tx_itr_setting;
+
+ /* check the old value and enable RSC if necessary */
+ need_reset = ixgbe_update_rsc(adapter);
+
+ if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
+ num_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+ else
+ num_vectors = 1;
+
+ for (i = 0; i < num_vectors; i++) {
+ q_vector = adapter->q_vector[i];
+ q_vector->tx.work_limit = adapter->tx_work_limit;
+ q_vector->rx.work_limit = adapter->rx_work_limit;
+ if (q_vector->tx.count && !q_vector->rx.count)
+ /* tx only */
+ q_vector->itr = tx_itr_param;
+ else
+ /* rx only or mixed */
+ q_vector->itr = rx_itr_param;
+ ixgbe_write_eitr(q_vector);
+ }
+
+ /*
+ * do reset here at the end to make sure EITR==0 case is handled
+ * correctly w.r.t stopping tx, and changing TXDCTL.WTHRESH settings
+ * also locks in RSC enable/disable which requires reset
+ */
+ if (need_reset)
+ ixgbe_do_reset(netdev);
+#endif
+ return 0;
+}
+
+#ifndef HAVE_NDO_SET_FEATURES
+static u32 ixgbe_get_rx_csum(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_ring *ring = adapter->rx_ring[0];
+ return test_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state);
+}
+
+static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct ixgbe_ring *ring = adapter->rx_ring[i];
+ if (data)
+ set_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state);
+ else
+ clear_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state);
+ }
+
+ /* LRO and RSC both depend on RX checksum to function */
+ if (!data && (netdev->features & NETIF_F_LRO)) {
+ netdev->features &= ~NETIF_F_LRO;
+
+ if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+ adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
+ ixgbe_do_reset(netdev);
+ }
+ }
+
+ return 0;
+}
+
+static u32 ixgbe_get_tx_csum(struct net_device *netdev)
+{
+ return (netdev->features & NETIF_F_IP_CSUM) != 0;
+}
+
+static int ixgbe_set_tx_csum(struct net_device *netdev, u32 data)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ u32 feature_list;
+
+#ifdef NETIF_F_IPV6_CSUM
+ feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+#else
+ feature_list = NETIF_F_IP_CSUM;
+#endif
+ switch (adapter->hw.mac.type) {
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ feature_list |= NETIF_F_SCTP_CSUM;
+ break;
+ default:
+ break;
+ }
+ if (data)
+ netdev->features |= feature_list;
+ else
+ netdev->features &= ~feature_list;
+
+ return 0;
+}
+
+#ifdef NETIF_F_TSO
+static int ixgbe_set_tso(struct net_device *netdev, u32 data)
+{
+ if (data) {
+ netdev->features |= NETIF_F_TSO;
+#ifdef NETIF_F_TSO6
+ netdev->features |= NETIF_F_TSO6;
+#endif
+ } else {
+#ifndef HAVE_NETDEV_VLAN_FEATURES
+#ifdef NETIF_F_HW_VLAN_TX
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ /* disable TSO on all VLANs if they're present */
+ if (adapter->vlgrp) {
+ int i;
+ struct net_device *v_netdev;
+ for (i = 0; i < VLAN_N_VID; i++) {
+ v_netdev =
+ vlan_group_get_device(adapter->vlgrp, i);
+ if (v_netdev) {
+ v_netdev->features &= ~NETIF_F_TSO;
+#ifdef NETIF_F_TSO6
+ v_netdev->features &= ~NETIF_F_TSO6;
+#endif
+ vlan_group_set_device(adapter->vlgrp, i,
+ v_netdev);
+ }
+ }
+ }
+#endif
+#endif /* HAVE_NETDEV_VLAN_FEATURES */
+ netdev->features &= ~NETIF_F_TSO;
+#ifdef NETIF_F_TSO6
+ netdev->features &= ~NETIF_F_TSO6;
+#endif
+ }
+ return 0;
+}
+
+#endif /* NETIF_F_TSO */
+#ifdef ETHTOOL_GFLAGS
+static int ixgbe_set_flags(struct net_device *netdev, u32 data)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN;
+ u32 changed = netdev->features ^ data;
+ bool need_reset = false;
+ int rc;
+
+#ifndef HAVE_VLAN_RX_REGISTER
+ if ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
+ !(data & ETH_FLAG_RXVLAN))
+ return -EINVAL;
+
+#endif
+#ifdef NETIF_F_RXHASH
+ if (adapter->flags & IXGBE_FLAG_RSS_ENABLED)
+ supported_flags |= ETH_FLAG_RXHASH;
+#endif
+#ifdef IXGBE_NO_LRO
+ if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
+#endif
+ supported_flags |= ETH_FLAG_LRO;
+
+#ifdef ETHTOOL_GRXRINGS
+ switch (adapter->hw.mac.type) {
+ case ixgbe_mac_X540:
+ case ixgbe_mac_82599EB:
+ supported_flags |= ETH_FLAG_NTUPLE;
+ default:
+ break;
+ }
+
+#endif
+ rc = ethtool_op_set_flags(netdev, data, supported_flags);
+ if (rc)
+ return rc;
+
+#ifndef HAVE_VLAN_RX_REGISTER
+ if (changed & ETH_FLAG_RXVLAN)
+ ixgbe_vlan_mode(netdev, netdev->features);
+
+#endif
+ /* if state changes we need to update adapter->flags and reset */
+ if (!(netdev->features & NETIF_F_LRO)) {
+ if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
+ need_reset = true;
+ adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
+ } else if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) &&
+ !(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) {
+ if (adapter->rx_itr_setting == 1 ||
+ adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) {
+ adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
+ need_reset = true;
+ } else if (changed & ETH_FLAG_LRO) {
+#ifdef IXGBE_NO_LRO
+ e_info(probe, "rx-usecs set too low, "
+ "disabling RSC\n");
+#else
+ e_info(probe, "rx-usecs set too low, "
+ "falling back to software LRO\n");
+#endif
+ }
+ }
+
+#ifdef ETHTOOL_GRXRINGS
+ /*
+ * Check if Flow Director n-tuple support was enabled or disabled. If
+ * the state changed, we need to reset.
+ */
+ if (!(netdev->features & NETIF_F_NTUPLE)) {
+ if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
+ /* turn off Flow Director, set ATR and reset */
+ if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) &&
+ !(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+ adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
+ need_reset = true;
+ }
+ adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
+ } else if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) {
+ /* turn off ATR, enable perfect filters and reset */
+ adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+ adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
+ need_reset = true;
+ }
+
+#endif /* ETHTOOL_GRXRINGS */
+ if (need_reset)
+ ixgbe_do_reset(netdev);
+
+ return 0;
+}
+
+#endif /* ETHTOOL_GFLAGS */
+#endif /* HAVE_NDO_SET_FEATURES */
+#ifdef ETHTOOL_GRXRINGS
+static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+ struct ethtool_rxnfc *cmd)
+{
+ union ixgbe_atr_input *mask = &adapter->fdir_mask;
+ struct ethtool_rx_flow_spec *fsp =
+ (struct ethtool_rx_flow_spec *)&cmd->fs;
+ struct hlist_node *node, *node2;
+ struct ixgbe_fdir_filter *rule = NULL;
+
+ /* report total rule count */
+ cmd->data = (1024 << adapter->fdir_pballoc) - 2;
+
+ hlist_for_each_entry_safe(rule, node, node2,
+ &adapter->fdir_filter_list, fdir_node) {
+ if (fsp->location <= rule->sw_idx)
+ break;
+ }
+
+ if (!rule || fsp->location != rule->sw_idx)
+ return -EINVAL;
+
+ /* fill out the flow spec entry */
+
+ /* set flow type field */
+ switch (rule->filter.formatted.flow_type) {
+ case IXGBE_ATR_FLOW_TYPE_TCPV4:
+ fsp->flow_type = TCP_V4_FLOW;
+ break;
+ case IXGBE_ATR_FLOW_TYPE_UDPV4:
+ fsp->flow_type = UDP_V4_FLOW;
+ break;
+ case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+ fsp->flow_type = SCTP_V4_FLOW;
+ break;
+ case IXGBE_ATR_FLOW_TYPE_IPV4:
+ fsp->flow_type = IP_USER_FLOW;
+ fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+ fsp->h_u.usr_ip4_spec.proto = 0;
+ fsp->m_u.usr_ip4_spec.proto = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ fsp->h_u.tcp_ip4_spec.psrc = rule->filter.formatted.src_port;
+ fsp->m_u.tcp_ip4_spec.psrc = mask->formatted.src_port;
+ fsp->h_u.tcp_ip4_spec.pdst = rule->filter.formatted.dst_port;
+ fsp->m_u.tcp_ip4_spec.pdst = mask->formatted.dst_port;
+ fsp->h_u.tcp_ip4_spec.ip4src = rule->filter.formatted.src_ip[0];
+ fsp->m_u.tcp_ip4_spec.ip4src = mask->formatted.src_ip[0];
+ fsp->h_u.tcp_ip4_spec.ip4dst = rule->filter.formatted.dst_ip[0];
+ fsp->m_u.tcp_ip4_spec.ip4dst = mask->formatted.dst_ip[0];
+ fsp->h_ext.vlan_tci = rule->filter.formatted.vlan_id;
+ fsp->m_ext.vlan_tci = mask->formatted.vlan_id;
+ fsp->h_ext.vlan_etype = rule->filter.formatted.flex_bytes;
+ fsp->m_ext.vlan_etype = mask->formatted.flex_bytes;
+ fsp->h_ext.data[1] = htonl(rule->filter.formatted.vm_pool);
+ fsp->m_ext.data[1] = htonl(mask->formatted.vm_pool);
+ fsp->flow_type |= FLOW_EXT;
+
+ /* record action */
+ if (rule->action == IXGBE_FDIR_DROP_QUEUE)
+ fsp->ring_cookie = RX_CLS_FLOW_DISC;
+ else
+ fsp->ring_cookie = rule->action;
+
+ return 0;
+}
+
+static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter,
+ struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+{
+ struct hlist_node *node, *node2;
+ struct ixgbe_fdir_filter *rule;
+ int cnt = 0;
+
+ /* report total rule count */
+ cmd->data = (1024 << adapter->fdir_pballoc) - 2;
+
+ hlist_for_each_entry_safe(rule, node, node2,
+ &adapter->fdir_filter_list, fdir_node) {
+ if (cnt == cmd->rule_cnt)
+ return -EMSGSIZE;
+ rule_locs[cnt] = rule->sw_idx;
+ cnt++;
+ }
+
+ cmd->rule_cnt = cnt;
+
+ return 0;
+}
+
+static int ixgbe_get_rss_hash_opts(struct ixgbe_adapter *adapter,
+ struct ethtool_rxnfc *cmd)
+{
+ cmd->data = 0;
+
+ /* if RSS is disabled then report no hashing */
+ if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED))
+ return 0;
+
+ /* Report default options for RSS on ixgbe */
+ switch (cmd->flow_type) {
+ case TCP_V4_FLOW:
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ case UDP_V4_FLOW:
+ if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ case SCTP_V4_FLOW:
+ case AH_ESP_V4_FLOW:
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ case IPV4_FLOW:
+ cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+ break;
+ case TCP_V6_FLOW:
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ case UDP_V6_FLOW:
+ if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+ cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ case SCTP_V6_FLOW:
+ case AH_ESP_V6_FLOW:
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ case IPV6_FLOW:
+ cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
+ void *rule_locs)
+#else
+ u32 *rule_locs)
+#endif
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ int ret = -EOPNOTSUPP;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+ cmd->data = adapter->num_rx_queues;
+ ret = 0;
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ cmd->rule_cnt = adapter->fdir_filter_count;
+ ret = 0;
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ ret = ixgbe_get_ethtool_fdir_entry(adapter, cmd);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ ret = ixgbe_get_ethtool_fdir_all(adapter, cmd,
+ (u32 *)rule_locs);
+ break;
+ case ETHTOOL_GRXFH:
+ ret = ixgbe_get_rss_hash_opts(adapter, cmd);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+ struct ixgbe_fdir_filter *input,
+ u16 sw_idx)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct hlist_node *node, *node2, *parent;
+ struct ixgbe_fdir_filter *rule;
+ int err = -EINVAL;
+
+ parent = NULL;
+ rule = NULL;
+
+ hlist_for_each_entry_safe(rule, node, node2,
+ &adapter->fdir_filter_list, fdir_node) {
+ /* hash found, or no matching entry */
+ if (rule->sw_idx >= sw_idx)
+ break;
+ parent = node;
+ }
+
+ /* if there is an old rule occupying our place remove it */
+ if (rule && (rule->sw_idx == sw_idx)) {
+ if (!input || (rule->filter.formatted.bkt_hash !=
+ input->filter.formatted.bkt_hash)) {
+ err = ixgbe_fdir_erase_perfect_filter_82599(hw,
+ &rule->filter,
+ sw_idx);
+ }
+
+ hlist_del(&rule->fdir_node);
+ kfree(rule);
+ adapter->fdir_filter_count--;
+ }
+
+ /*
+ * If no input this was a delete, err should be 0 if a rule was
+ * successfully found and removed from the list else -EINVAL
+ */
+ if (!input)
+ return err;
+
+ /* initialize node and set software index */
+ INIT_HLIST_NODE(&input->fdir_node);
+
+ /* add filter to the list */
+ if (parent)
+ hlist_add_after(parent, &input->fdir_node);
+ else
+ hlist_add_head(&input->fdir_node,
+ &adapter->fdir_filter_list);
+
+ /* update counts */
+ adapter->fdir_filter_count++;
+
+ return 0;
+}
+
+static int ixgbe_flowspec_to_flow_type(struct ethtool_rx_flow_spec *fsp,
+ u8 *flow_type)
+{
+ switch (fsp->flow_type & ~FLOW_EXT) {
+ case TCP_V4_FLOW:
+ *flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+ break;
+ case UDP_V4_FLOW:
+ *flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
+ break;
+ case SCTP_V4_FLOW:
+ *flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
+ break;
+ case IP_USER_FLOW:
+ switch (fsp->h_u.usr_ip4_spec.proto) {
+ case IPPROTO_TCP:
+ *flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+ break;
+ case IPPROTO_UDP:
+ *flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
+ break;
+ case IPPROTO_SCTP:
+ *flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
+ break;
+ case 0:
+ if (!fsp->m_u.usr_ip4_spec.proto) {
+ *flow_type = IXGBE_ATR_FLOW_TYPE_IPV4;
+ break;
+ }
+ default:
+ return 0;
+ }
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+ struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp =
+ (struct ethtool_rx_flow_spec *)&cmd->fs;
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_fdir_filter *input;
+ union ixgbe_atr_input mask;
+ int err;
+
+ if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+ return -EOPNOTSUPP;
+
+ /*
+ * Don't allow programming if the action is a queue greater than
+ * the number of online Rx queues.
+ */
+ if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
+ (fsp->ring_cookie >= adapter->num_rx_queues))
+ return -EINVAL;
+
+ /* Don't allow indexes to exist outside of available space */
+ if (fsp->location >= ((1024 << adapter->fdir_pballoc) - 2)) {
+ e_err(drv, "Location out of range\n");
+ return -EINVAL;
+ }
+
+ input = kzalloc(sizeof(*input), GFP_ATOMIC);
+ if (!input)
+ return -ENOMEM;
+
+ memset(&mask, 0, sizeof(union ixgbe_atr_input));
+
+ /* set SW index */
+ input->sw_idx = fsp->location;
+
+ /* record flow type */
+ if (!ixgbe_flowspec_to_flow_type(fsp,
+ &input->filter.formatted.flow_type)) {
+ e_err(drv, "Unrecognized flow type\n");
+ goto err_out;
+ }
+
+ mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
+ IXGBE_ATR_L4TYPE_MASK;
+
+ if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4)
+ mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK;
+
+ /* Copy input into formatted structures */
+ input->filter.formatted.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+ mask.formatted.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src;
+ input->filter.formatted.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+ mask.formatted.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst;
+ input->filter.formatted.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+ mask.formatted.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+ input->filter.formatted.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+ mask.formatted.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+
+ if (fsp->flow_type & FLOW_EXT) {
+ input->filter.formatted.vm_pool =
+ (unsigned char)ntohl(fsp->h_ext.data[1]);
+ mask.formatted.vm_pool =
+ (unsigned char)ntohl(fsp->m_ext.data[1]);
+ input->filter.formatted.vlan_id = fsp->h_ext.vlan_tci;
+ mask.formatted.vlan_id = fsp->m_ext.vlan_tci;
+ input->filter.formatted.flex_bytes =
+ fsp->h_ext.vlan_etype;
+ mask.formatted.flex_bytes = fsp->m_ext.vlan_etype;
+ }
+
+ /* determine if we need to drop or route the packet */
+ if (fsp->ring_cookie == RX_CLS_FLOW_DISC)
+ input->action = IXGBE_FDIR_DROP_QUEUE;
+ else
+ input->action = fsp->ring_cookie;
+
+ spin_lock(&adapter->fdir_perfect_lock);
+
+ if (hlist_empty(&adapter->fdir_filter_list)) {
+ /* save mask and program input mask into HW */
+ memcpy(&adapter->fdir_mask, &mask, sizeof(mask));
+ err = ixgbe_fdir_set_input_mask_82599(hw, &mask);
+ if (err) {
+ e_err(drv, "Error writing mask\n");
+ goto err_out_w_lock;
+ }
+ } else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) {
+ e_err(drv, "Only one mask supported per port\n");
+ goto err_out_w_lock;
+ }
+
+ /* apply mask and compute/store hash */
+ ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask);
+
+ /* program filters to filter memory */
+ err = ixgbe_fdir_write_perfect_filter_82599(hw,
+ &input->filter, input->sw_idx,
+ (input->action == IXGBE_FDIR_DROP_QUEUE) ?
+ IXGBE_FDIR_DROP_QUEUE :
+ adapter->rx_ring[input->action]->reg_idx);
+ if (err)
+ goto err_out_w_lock;
+
+ ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
+
+ spin_unlock(&adapter->fdir_perfect_lock);
+
+ kfree(input);
+ return err;
+err_out_w_lock:
+ spin_unlock(&adapter->fdir_perfect_lock);
+err_out:
+ kfree(input);
+ return -EINVAL;
+}
+
+static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+ struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp =
+ (struct ethtool_rx_flow_spec *)&cmd->fs;
+ int err;
+
+ spin_lock(&adapter->fdir_perfect_lock);
+ err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, (u16)(fsp->location));
+ spin_unlock(&adapter->fdir_perfect_lock);
+
+ return err;
+}
+
+#ifdef ETHTOOL_SRXNTUPLE
+/*
+ * We need to keep this around for kernels 2.6.33 - 2.6.39 in order to avoid
+ * a null pointer dereference as it was assumend if the NETIF_F_NTUPLE flag
+ * was defined that this function was present.
+ */
+static int ixgbe_set_rx_ntuple(struct net_device *dev,
+ struct ethtool_rx_ntuple *cmd)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif
+#define UDP_RSS_FLAGS (IXGBE_FLAG2_RSS_FIELD_IPV4_UDP | \
+ IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+static int ixgbe_set_rss_hash_opt(struct ixgbe_adapter *adapter,
+ struct ethtool_rxnfc *nfc)
+{
+ u32 flags2 = adapter->flags2;
+
+ /*
+ * RSS does not support anything other than hashing
+ * to queues on src and dst IPs and ports
+ */
+ if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3))
+ return -EINVAL;
+
+ switch (nfc->flow_type) {
+ case TCP_V4_FLOW:
+ case TCP_V6_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST) ||
+ !(nfc->data & RXH_L4_B_0_1) ||
+ !(nfc->data & RXH_L4_B_2_3))
+ return -EINVAL;
+ break;
+ case UDP_V4_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST))
+ return -EINVAL;
+ switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ case 0:
+ flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV4_UDP;
+ break;
+ case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+ flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV4_UDP;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case UDP_V6_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST))
+ return -EINVAL;
+ switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ case 0:
+ flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV6_UDP;
+ break;
+ case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+ flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV6_UDP;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case AH_ESP_V4_FLOW:
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ case AH_ESP_V6_FLOW:
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ if (!(nfc->data & RXH_IP_SRC) ||
+ !(nfc->data & RXH_IP_DST) ||
+ (nfc->data & RXH_L4_B_0_1) ||
+ (nfc->data & RXH_L4_B_2_3))
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* if we changed something we need to update flags */
+ if (flags2 != adapter->flags2) {
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+
+ if ((flags2 & UDP_RSS_FLAGS) &&
+ !(adapter->flags2 & UDP_RSS_FLAGS))
+ e_warn(drv, "enabling UDP RSS: fragmented packets"
+ " may arrive out of order to the stack above\n");
+
+ adapter->flags2 = flags2;
+
+ /* Perform hash on these packet types */
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4
+ | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
+ | IXGBE_MRQC_RSS_FIELD_IPV6
+ | IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
+
+ mrqc &= ~(IXGBE_MRQC_RSS_FIELD_IPV4_UDP |
+ IXGBE_MRQC_RSS_FIELD_IPV6_UDP);
+
+ if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
+
+ if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
+
+ IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+ }
+
+ return 0;
+}
+
+static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ int ret = -EOPNOTSUPP;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ ret = ixgbe_add_ethtool_fdir_entry(adapter, cmd);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ ret = ixgbe_del_ethtool_fdir_entry(adapter, cmd);
+ break;
+ case ETHTOOL_SRXFH:
+ ret = ixgbe_set_rss_hash_opt(adapter, cmd);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+#endif /* ETHTOOL_GRXRINGS */
+//static
+struct ethtool_ops ixgbe_ethtool_ops = {
+ .get_settings = ixgbe_get_settings,
+ .set_settings = ixgbe_set_settings,
+ .get_drvinfo = ixgbe_get_drvinfo,
+ .get_regs_len = ixgbe_get_regs_len,
+ .get_regs = ixgbe_get_regs,
+ .get_wol = ixgbe_get_wol,
+ .set_wol = ixgbe_set_wol,
+ .nway_reset = ixgbe_nway_reset,
+ .get_link = ethtool_op_get_link,
+ .get_eeprom_len = ixgbe_get_eeprom_len,
+ .get_eeprom = ixgbe_get_eeprom,
+ .set_eeprom = ixgbe_set_eeprom,
+ .get_ringparam = ixgbe_get_ringparam,
+ .set_ringparam = ixgbe_set_ringparam,
+ .get_pauseparam = ixgbe_get_pauseparam,
+ .set_pauseparam = ixgbe_set_pauseparam,
+ .get_msglevel = ixgbe_get_msglevel,
+ .set_msglevel = ixgbe_set_msglevel,
+#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
+ .self_test_count = ixgbe_diag_test_count,
+#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
+ .self_test = ixgbe_diag_test,
+ .get_strings = ixgbe_get_strings,
+#ifdef HAVE_ETHTOOL_SET_PHYS_ID
+ .set_phys_id = ixgbe_set_phys_id,
+#else
+ .phys_id = ixgbe_phys_id,
+#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
+#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
+ .get_stats_count = ixgbe_get_stats_count,
+#else /* HAVE_ETHTOOL_GET_SSET_COUNT */
+ .get_sset_count = ixgbe_get_sset_count,
+#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
+ .get_ethtool_stats = ixgbe_get_ethtool_stats,
+#ifdef HAVE_ETHTOOL_GET_PERM_ADDR
+ .get_perm_addr = ethtool_op_get_perm_addr,
+#endif
+ .get_coalesce = ixgbe_get_coalesce,
+ .set_coalesce = ixgbe_set_coalesce,
+#ifndef HAVE_NDO_SET_FEATURES
+ .get_rx_csum = ixgbe_get_rx_csum,
+ .set_rx_csum = ixgbe_set_rx_csum,
+ .get_tx_csum = ixgbe_get_tx_csum,
+ .set_tx_csum = ixgbe_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = ethtool_op_set_sg,
+#ifdef NETIF_F_TSO
+ .get_tso = ethtool_op_get_tso,
+ .set_tso = ixgbe_set_tso,
+#endif
+#ifdef ETHTOOL_GFLAGS
+ .get_flags = ethtool_op_get_flags,
+ .set_flags = ixgbe_set_flags,
+#endif
+#endif /* HAVE_NDO_SET_FEATURES */
+#ifdef ETHTOOL_GRXRINGS
+ .get_rxnfc = ixgbe_get_rxnfc,
+ .set_rxnfc = ixgbe_set_rxnfc,
+#ifdef ETHTOOL_SRXNTUPLE
+ .set_rx_ntuple = ixgbe_set_rx_ntuple,
+#endif
+#endif
+};
+
+void ixgbe_set_ethtool_ops(struct net_device *netdev)
+{
+ SET_ETHTOOL_OPS(netdev, &ixgbe_ethtool_ops);
+}
+#endif /* SIOCETHTOOL */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
new file mode 100644
index 00000000..cad28622
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
@@ -0,0 +1,91 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_FCOE_H
+#define _IXGBE_FCOE_H
+
+#ifdef IXGBE_FCOE
+
+#include <scsi/fc/fc_fs.h>
+#include <scsi/fc/fc_fcoe.h>
+
+/* shift bits within STAT fo FCSTAT */
+#define IXGBE_RXDADV_FCSTAT_SHIFT 4
+
+/* ddp user buffer */
+#define IXGBE_BUFFCNT_MAX 256 /* 8 bits bufcnt */
+#define IXGBE_FCPTR_ALIGN 16
+#define IXGBE_FCPTR_MAX (IXGBE_BUFFCNT_MAX * sizeof(dma_addr_t))
+#define IXGBE_FCBUFF_4KB 0x0
+#define IXGBE_FCBUFF_8KB 0x1
+#define IXGBE_FCBUFF_16KB 0x2
+#define IXGBE_FCBUFF_64KB 0x3
+#define IXGBE_FCBUFF_MAX 65536 /* 64KB max */
+#define IXGBE_FCBUFF_MIN 4096 /* 4KB min */
+#define IXGBE_FCOE_DDP_MAX 512 /* 9 bits xid */
+
+/* Default traffic class to use for FCoE */
+#define IXGBE_FCOE_DEFTC 3
+
+/* fcerr */
+#define IXGBE_FCERR_BADCRC 0x00100000
+#define IXGBE_FCERR_EOFSOF 0x00200000
+#define IXGBE_FCERR_NOFIRST 0x00300000
+#define IXGBE_FCERR_OOOSEQ 0x00400000
+#define IXGBE_FCERR_NODMA 0x00500000
+#define IXGBE_FCERR_PKTLOST 0x00600000
+
+/* FCoE DDP for target mode */
+#define __IXGBE_FCOE_TARGET 1
+
+struct ixgbe_fcoe_ddp {
+ int len;
+ u32 err;
+ unsigned int sgc;
+ struct scatterlist *sgl;
+ dma_addr_t udp;
+ u64 *udl;
+ struct pci_pool *pool;
+};
+
+struct ixgbe_fcoe {
+ struct pci_pool **pool;
+ atomic_t refcnt;
+ spinlock_t lock;
+ struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX];
+ unsigned char *extra_ddp_buffer;
+ dma_addr_t extra_ddp_buffer_dma;
+ u64 __percpu *pcpu_noddp;
+ u64 __percpu *pcpu_noddp_ext_buff;
+ unsigned long mode;
+ u8 tc;
+ u8 up;
+ u8 up_set;
+};
+#endif /* IXGBE_FCOE */
+
+#endif /* _IXGBE_FCOE_H */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
new file mode 100644
index 00000000..8c1d2fe3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
@@ -0,0 +1,2970 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/******************************************************************************
+ Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code
+******************************************************************************/
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#ifdef HAVE_SCTP
+#include <linux/sctp.h>
+#endif
+#include <linux/pkt_sched.h>
+#include <linux/ipv6.h>
+#ifdef NETIF_F_TSO
+#include <net/checksum.h>
+#ifdef NETIF_F_TSO6
+#include <net/ip6_checksum.h>
+#endif
+#endif
+#ifdef SIOCETHTOOL
+#include <linux/ethtool.h>
+#endif
+
+#include "ixgbe.h"
+
+#undef CONFIG_DCA
+#undef CONFIG_DCA_MODULE
+
+char ixgbe_driver_name[] = "ixgbe";
+static const char ixgbe_driver_string[] =
+ "Intel(R) 10 Gigabit PCI Express Network Driver";
+#define DRV_HW_PERF
+
+#ifndef CONFIG_IXGBE_NAPI
+#define DRIVERNAPI
+#else
+#define DRIVERNAPI "-NAPI"
+#endif
+
+#define FPGA
+
+#define VMDQ_TAG
+
+#define MAJ 3
+#define MIN 9
+#define BUILD 17
+#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
+ __stringify(BUILD) DRIVERNAPI DRV_HW_PERF FPGA VMDQ_TAG
+const char ixgbe_driver_version[] = DRV_VERSION;
+static const char ixgbe_copyright[] =
+ "Copyright (c) 1999-2012 Intel Corporation.";
+
+/* ixgbe_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ * Class, Class Mask, private data (not used) }
+ */
+DEFINE_PCI_DEVICE_TABLE(ixgbe_pci_tbl) = {
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT2)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_CX4)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_XF_LR)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_BX)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_XAUI_LOM)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KR)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_EM)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_CX4)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_FCOE)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_T3_LOM)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF2)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_LS)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599EN_SFP)},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP)},
+ /* required last entry */
+ {0, }
+};
+
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+static int ixgbe_notify_dca(struct notifier_block *, unsigned long event,
+ void *p);
+static struct notifier_block dca_notifier = {
+ .notifier_call = ixgbe_notify_dca,
+ .next = NULL,
+ .priority = 0
+};
+
+#endif
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+#define DEFAULT_DEBUG_LEVEL_SHIFT 3
+
+
+static void ixgbe_release_hw_control(struct ixgbe_adapter *adapter)
+{
+ u32 ctrl_ext;
+
+ /* Let firmware take over control of h/w */
+ ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
+ ctrl_ext & ~IXGBE_CTRL_EXT_DRV_LOAD);
+}
+
+#ifdef NO_VNIC
+static void ixgbe_get_hw_control(struct ixgbe_adapter *adapter)
+{
+ u32 ctrl_ext;
+
+ /* Let firmware know the driver has taken over */
+ ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
+ ctrl_ext | IXGBE_CTRL_EXT_DRV_LOAD);
+}
+#endif
+
+
+static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_hw_stats *hwstats = &adapter->stats;
+ int i;
+ u32 data;
+
+ if ((hw->fc.current_mode != ixgbe_fc_full) &&
+ (hw->fc.current_mode != ixgbe_fc_rx_pause))
+ return;
+
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
+ break;
+ default:
+ data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
+ }
+ hwstats->lxoffrxc += data;
+
+ /* refill credits (no tx hang) if we received xoff */
+ if (!data)
+ return;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ clear_bit(__IXGBE_HANG_CHECK_ARMED,
+ &adapter->tx_ring[i]->state);
+}
+
+static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_hw_stats *hwstats = &adapter->stats;
+ u32 xoff[8] = {0};
+ int i;
+ bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
+
+#ifdef HAVE_DCBNL_IEEE
+ if (adapter->ixgbe_ieee_pfc)
+ pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
+
+#endif
+ if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !pfc_en) {
+ ixgbe_update_xoff_rx_lfc(adapter);
+ return;
+ }
+
+ /* update stats for each tc, only valid with PFC enabled */
+ for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
+ break;
+ default:
+ xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
+ }
+ hwstats->pxoffrxc[i] += xoff[i];
+ }
+
+ /* disarm tx queues that have received xoff frames */
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
+ u8 tc = tx_ring->dcb_tc;
+
+ if ((tc <= 7) && (xoff[tc]))
+ clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
+ }
+}
+
+
+
+
+#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
+
+
+
+
+#ifdef HAVE_8021P_SUPPORT
+/**
+ * ixgbe_vlan_stripping_disable - helper to disable vlan tag stripping
+ * @adapter: driver data
+ */
+void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 vlnctrl;
+ int i;
+
+ /* leave vlan tag stripping enabled for DCB */
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+ return;
+
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+ vlnctrl &= ~IXGBE_VLNCTRL_VME;
+ IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ u8 reg_idx = adapter->rx_ring[i]->reg_idx;
+ vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+ vlnctrl &= ~IXGBE_RXDCTL_VME;
+ IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+#endif
+/**
+ * ixgbe_vlan_stripping_enable - helper to enable vlan tag stripping
+ * @adapter: driver data
+ */
+void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 vlnctrl;
+ int i;
+
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+ vlnctrl |= IXGBE_VLNCTRL_VME;
+ IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ u8 reg_idx = adapter->rx_ring[i]->reg_idx;
+ vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+ vlnctrl |= IXGBE_RXDCTL_VME;
+ IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+#ifdef HAVE_VLAN_RX_REGISTER
+void ixgbe_vlan_mode(struct net_device *netdev, struct vlan_group *grp)
+#else
+void ixgbe_vlan_mode(struct net_device *netdev, u32 features)
+#endif
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+#ifdef HAVE_8021P_SUPPORT
+ bool enable;
+#endif
+#ifdef HAVE_VLAN_RX_REGISTER
+
+ //if (!test_bit(__IXGBE_DOWN, &adapter->state))
+ // ixgbe_irq_disable(adapter);
+
+ adapter->vlgrp = grp;
+
+ //if (!test_bit(__IXGBE_DOWN, &adapter->state))
+ // ixgbe_irq_enable(adapter, true, true);
+#endif
+#ifdef HAVE_8021P_SUPPORT
+#ifdef HAVE_VLAN_RX_REGISTER
+ enable = (grp || (adapter->flags & IXGBE_FLAG_DCB_ENABLED));
+#else
+ enable = !!(features & NETIF_F_HW_VLAN_RX);
+#endif
+ if (enable)
+ /* enable VLAN tag insert/strip */
+ ixgbe_vlan_stripping_enable(adapter);
+ else
+ /* disable VLAN tag insert/strip */
+ ixgbe_vlan_stripping_disable(adapter);
+
+#endif
+}
+
+static u8 *ixgbe_addr_list_itr(struct ixgbe_hw *hw, u8 **mc_addr_ptr, u32 *vmdq)
+{
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+ struct netdev_hw_addr *mc_ptr;
+#else
+ struct dev_mc_list *mc_ptr;
+#endif
+ struct ixgbe_adapter *adapter = hw->back;
+ u8 *addr = *mc_addr_ptr;
+
+ *vmdq = adapter->num_vfs;
+
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+ mc_ptr = container_of(addr, struct netdev_hw_addr, addr[0]);
+ if (mc_ptr->list.next) {
+ struct netdev_hw_addr *ha;
+
+ ha = list_entry(mc_ptr->list.next, struct netdev_hw_addr, list);
+ *mc_addr_ptr = ha->addr;
+ }
+#else
+ mc_ptr = container_of(addr, struct dev_mc_list, dmi_addr[0]);
+ if (mc_ptr->next)
+ *mc_addr_ptr = mc_ptr->next->dmi_addr;
+#endif
+ else
+ *mc_addr_ptr = NULL;
+
+ return addr;
+}
+
+/**
+ * ixgbe_write_mc_addr_list - write multicast addresses to MTA
+ * @netdev: network interface device structure
+ *
+ * Writes multicast address list to the MTA hash table.
+ * Returns: -ENOMEM on failure
+ * 0 on no addresses written
+ * X on writing X addresses to MTA
+ **/
+int ixgbe_write_mc_addr_list(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+ struct netdev_hw_addr *ha;
+#endif
+ u8 *addr_list = NULL;
+ int addr_count = 0;
+
+ if (!hw->mac.ops.update_mc_addr_list)
+ return -ENOMEM;
+
+ if (!netif_running(netdev))
+ return 0;
+
+
+ hw->mac.ops.update_mc_addr_list(hw, NULL, 0,
+ ixgbe_addr_list_itr, true);
+
+ if (!netdev_mc_empty(netdev)) {
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+ ha = list_first_entry(&netdev->mc.list,
+ struct netdev_hw_addr, list);
+ addr_list = ha->addr;
+#else
+ addr_list = netdev->mc_list->dmi_addr;
+#endif
+ addr_count = netdev_mc_count(netdev);
+
+ hw->mac.ops.update_mc_addr_list(hw, addr_list, addr_count,
+ ixgbe_addr_list_itr, false);
+ }
+
+#ifdef CONFIG_PCI_IOV
+ //ixgbe_restore_vf_multicasts(adapter);
+#endif
+ return addr_count;
+}
+
+
+void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ int i;
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) {
+ hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr,
+ adapter->mac_table[i].queue,
+ IXGBE_RAH_AV);
+ } else {
+ hw->mac.ops.clear_rar(hw, i);
+ }
+ }
+}
+
+void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ int i;
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) {
+ if (adapter->mac_table[i].state &
+ IXGBE_MAC_STATE_IN_USE) {
+ hw->mac.ops.set_rar(hw, i,
+ adapter->mac_table[i].addr,
+ adapter->mac_table[i].queue,
+ IXGBE_RAH_AV);
+ } else {
+ hw->mac.ops.clear_rar(hw, i);
+ }
+ adapter->mac_table[i].state &=
+ ~(IXGBE_MAC_STATE_MODIFIED);
+ }
+ }
+}
+
+int ixgbe_available_rars(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ int i, count = 0;
+
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ if (adapter->mac_table[i].state == 0)
+ count++;
+ }
+ return count;
+}
+
+int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ int i;
+
+ if (is_zero_ether_addr(addr))
+ return 0;
+
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE)
+ continue;
+ adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED |
+ IXGBE_MAC_STATE_IN_USE);
+ memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN);
+ adapter->mac_table[i].queue = queue;
+ ixgbe_sync_mac_table(adapter);
+ return i;
+ }
+ return -ENOMEM;
+}
+
+void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter)
+{
+ int i;
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
+ adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
+ memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+ adapter->mac_table[i].queue = 0;
+ }
+ ixgbe_sync_mac_table(adapter);
+}
+
+void ixgbe_del_mac_filter_by_index(struct ixgbe_adapter *adapter, int index)
+{
+ adapter->mac_table[index].state |= IXGBE_MAC_STATE_MODIFIED;
+ adapter->mac_table[index].state &= ~IXGBE_MAC_STATE_IN_USE;
+ memset(adapter->mac_table[index].addr, 0, ETH_ALEN);
+ adapter->mac_table[index].queue = 0;
+ ixgbe_sync_mac_table(adapter);
+}
+
+int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8* addr, u16 queue)
+{
+ /* search table for addr, if found, set to 0 and sync */
+ int i;
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ if (is_zero_ether_addr(addr))
+ return 0;
+ for (i = 0; i < hw->mac.num_rar_entries; i++) {
+ if (ether_addr_equal(addr, adapter->mac_table[i].addr) &&
+ adapter->mac_table[i].queue == queue) {
+ adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
+ adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
+ memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+ adapter->mac_table[i].queue = 0;
+ ixgbe_sync_mac_table(adapter);
+ return 0;
+ }
+ }
+ return -ENOMEM;
+}
+#ifdef HAVE_SET_RX_MODE
+/**
+ * ixgbe_write_uc_addr_list - write unicast addresses to RAR table
+ * @netdev: network interface device structure
+ *
+ * Writes unicast address list to the RAR table.
+ * Returns: -ENOMEM on failure/insufficient address space
+ * 0 on no addresses written
+ * X on writing X addresses to the RAR table
+ **/
+int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter,
+ struct net_device *netdev, unsigned int vfn)
+{
+ int count = 0;
+
+ /* return ENOMEM indicating insufficient memory for addresses */
+ if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter))
+ return -ENOMEM;
+
+ if (!netdev_uc_empty(netdev)) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+ struct netdev_hw_addr *ha;
+#else
+ struct dev_mc_list *ha;
+#endif
+ netdev_for_each_uc_addr(ha, netdev) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+ ixgbe_del_mac_filter(adapter, ha->addr, (u16)vfn);
+ ixgbe_add_mac_filter(adapter, ha->addr, (u16)vfn);
+#else
+ ixgbe_del_mac_filter(adapter, ha->da_addr, (u16)vfn);
+ ixgbe_add_mac_filter(adapter, ha->da_addr, (u16)vfn);
+#endif
+ count++;
+ }
+ }
+ return count;
+}
+
+#endif
+/**
+ * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_method entry point is called whenever the unicast/multicast
+ * address list or the network interface flags are updated. This routine is
+ * responsible for configuring the hardware for proper unicast, multicast and
+ * promiscuous mode.
+ **/
+void ixgbe_set_rx_mode(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE;
+ u32 vlnctrl;
+ int count;
+
+ /* Check for Promiscuous and All Multicast modes */
+ fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+ vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+
+ /* set all bits that we expect to always be set */
+ fctrl |= IXGBE_FCTRL_BAM;
+ fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */
+ fctrl |= IXGBE_FCTRL_PMCF;
+
+ /* clear the bits we are changing the status of */
+ fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
+ vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
+
+ if (netdev->flags & IFF_PROMISC) {
+ hw->addr_ctrl.user_set_promisc = true;
+ fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
+ vmolr |= IXGBE_VMOLR_MPE;
+ } else {
+ if (netdev->flags & IFF_ALLMULTI) {
+ fctrl |= IXGBE_FCTRL_MPE;
+ vmolr |= IXGBE_VMOLR_MPE;
+ } else {
+ /*
+ * Write addresses to the MTA, if the attempt fails
+ * then we should just turn on promiscuous mode so
+ * that we can at least receive multicast traffic
+ */
+ count = ixgbe_write_mc_addr_list(netdev);
+ if (count < 0) {
+ fctrl |= IXGBE_FCTRL_MPE;
+ vmolr |= IXGBE_VMOLR_MPE;
+ } else if (count) {
+ vmolr |= IXGBE_VMOLR_ROMPE;
+ }
+ }
+#ifdef NETIF_F_HW_VLAN_TX
+ /* enable hardware vlan filtering */
+ vlnctrl |= IXGBE_VLNCTRL_VFE;
+#endif
+ hw->addr_ctrl.user_set_promisc = false;
+#ifdef HAVE_SET_RX_MODE
+ /*
+ * Write addresses to available RAR registers, if there is not
+ * sufficient space to store all the addresses then enable
+ * unicast promiscuous mode
+ */
+ count = ixgbe_write_uc_addr_list(adapter, netdev,
+ adapter->num_vfs);
+ if (count < 0) {
+ fctrl |= IXGBE_FCTRL_UPE;
+ vmolr |= IXGBE_VMOLR_ROPE;
+ }
+#endif
+ }
+
+ if (hw->mac.type != ixgbe_mac_82598EB) {
+ vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(adapter->num_vfs)) &
+ ~(IXGBE_VMOLR_MPE | IXGBE_VMOLR_ROMPE |
+ IXGBE_VMOLR_ROPE);
+ IXGBE_WRITE_REG(hw, IXGBE_VMOLR(adapter->num_vfs), vmolr);
+ }
+
+ IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+ IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
+}
+
+
+
+
+
+
+
+
+/* Additional bittime to account for IXGBE framing */
+#define IXGBE_ETH_FRAMING 20
+
+/*
+ * ixgbe_hpbthresh - calculate high water mark for flow control
+ *
+ * @adapter: board private structure to calculate for
+ * @pb - packet buffer to calculate
+ */
+static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct net_device *dev = adapter->netdev;
+ int link, tc, kb, marker;
+ u32 dv_id, rx_pba;
+
+ /* Calculate max LAN frame size */
+ tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + IXGBE_ETH_FRAMING;
+
+#ifdef IXGBE_FCOE
+ /* FCoE traffic class uses FCOE jumbo frames */
+ if (dev->features & NETIF_F_FCOE_MTU) {
+ int fcoe_pb = 0;
+
+ fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up);
+
+ if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE)
+ tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
+ }
+#endif
+
+ /* Calculate delay value for device */
+ switch (hw->mac.type) {
+ case ixgbe_mac_X540:
+ dv_id = IXGBE_DV_X540(link, tc);
+ break;
+ default:
+ dv_id = IXGBE_DV(link, tc);
+ break;
+ }
+
+ /* Loopback switch introduces additional latency */
+ if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+ dv_id += IXGBE_B2BT(tc);
+
+ /* Delay value is calculated in bit times convert to KB */
+ kb = IXGBE_BT2KB(dv_id);
+ rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10;
+
+ marker = rx_pba - kb;
+
+ /* It is possible that the packet buffer is not large enough
+ * to provide required headroom. In this case throw an error
+ * to user and a do the best we can.
+ */
+ if (marker < 0) {
+ e_warn(drv, "Packet Buffer(%i) can not provide enough"
+ "headroom to suppport flow control."
+ "Decrease MTU or number of traffic classes\n", pb);
+ marker = tc + 1;
+ }
+
+ return marker;
+}
+
+/*
+ * ixgbe_lpbthresh - calculate low water mark for for flow control
+ *
+ * @adapter: board private structure to calculate for
+ * @pb - packet buffer to calculate
+ */
+static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct net_device *dev = adapter->netdev;
+ int tc;
+ u32 dv_id;
+
+ /* Calculate max LAN frame size */
+ tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+#ifdef IXGBE_FCOE
+ /* FCoE traffic class uses FCOE jumbo frames */
+ if (dev->features & NETIF_F_FCOE_MTU) {
+ int fcoe_pb = 0;
+
+ fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up);
+
+ if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE)
+ tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
+ }
+#endif
+
+ /* Calculate delay value for device */
+ switch (hw->mac.type) {
+ case ixgbe_mac_X540:
+ dv_id = IXGBE_LOW_DV_X540(tc);
+ break;
+ default:
+ dv_id = IXGBE_LOW_DV(tc);
+ break;
+ }
+
+ /* Delay value is calculated in bit times convert to KB */
+ return IXGBE_BT2KB(dv_id);
+}
+
+/*
+ * ixgbe_pbthresh_setup - calculate and setup high low water marks
+ */
+static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ int num_tc = netdev_get_num_tc(adapter->netdev);
+ int i;
+
+ if (!num_tc)
+ num_tc = 1;
+ if (num_tc > IXGBE_DCB_MAX_TRAFFIC_CLASS)
+ num_tc = IXGBE_DCB_MAX_TRAFFIC_CLASS;
+
+ for (i = 0; i < num_tc; i++) {
+ hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i);
+ hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i);
+
+ /* Low water marks must not be larger than high water marks */
+ if (hw->fc.low_water[i] > hw->fc.high_water[i])
+ hw->fc.low_water[i] = 0;
+ }
+
+ for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++)
+ hw->fc.high_water[i] = 0;
+}
+
+
+
+#ifdef NO_VNIC
+static void ixgbe_configure(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ ixgbe_configure_pb(adapter);
+ ixgbe_configure_dcb(adapter);
+
+ ixgbe_set_rx_mode(adapter->netdev);
+#ifdef NETIF_F_HW_VLAN_TX
+ ixgbe_restore_vlan(adapter);
+#endif
+
+#ifdef IXGBE_FCOE
+ if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
+ ixgbe_configure_fcoe(adapter);
+
+#endif /* IXGBE_FCOE */
+
+ if (adapter->hw.mac.type != ixgbe_mac_82598EB)
+ hw->mac.ops.disable_sec_rx_path(hw);
+
+ if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
+ ixgbe_init_fdir_signature_82599(&adapter->hw,
+ adapter->fdir_pballoc);
+ } else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
+ ixgbe_init_fdir_perfect_82599(&adapter->hw,
+ adapter->fdir_pballoc);
+ ixgbe_fdir_filter_restore(adapter);
+ }
+
+ if (adapter->hw.mac.type != ixgbe_mac_82598EB)
+ hw->mac.ops.enable_sec_rx_path(hw);
+
+ ixgbe_configure_virtualization(adapter);
+
+ ixgbe_configure_tx(adapter);
+ ixgbe_configure_rx(adapter);
+}
+#endif
+
+static bool ixgbe_is_sfp(struct ixgbe_hw *hw)
+{
+ switch (hw->phy.type) {
+ case ixgbe_phy_sfp_avago:
+ case ixgbe_phy_sfp_ftl:
+ case ixgbe_phy_sfp_intel:
+ case ixgbe_phy_sfp_unknown:
+ case ixgbe_phy_sfp_passive_tyco:
+ case ixgbe_phy_sfp_passive_unknown:
+ case ixgbe_phy_sfp_active_unknown:
+ case ixgbe_phy_sfp_ftl_active:
+ return true;
+ case ixgbe_phy_nl:
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+/**
+ * ixgbe_clear_vf_stats_counters - Clear out VF stats after reset
+ * @adapter: board private structure
+ *
+ * On a reset we need to clear out the VF stats or accounting gets
+ * messed up because they're not clear on read.
+ **/
+void ixgbe_clear_vf_stats_counters(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ int i;
+
+ for (i = 0; i < adapter->num_vfs; i++) {
+ adapter->vfinfo[i].last_vfstats.gprc =
+ IXGBE_READ_REG(hw, IXGBE_PVFGPRC(i));
+ adapter->vfinfo[i].saved_rst_vfstats.gprc +=
+ adapter->vfinfo[i].vfstats.gprc;
+ adapter->vfinfo[i].vfstats.gprc = 0;
+ adapter->vfinfo[i].last_vfstats.gptc =
+ IXGBE_READ_REG(hw, IXGBE_PVFGPTC(i));
+ adapter->vfinfo[i].saved_rst_vfstats.gptc +=
+ adapter->vfinfo[i].vfstats.gptc;
+ adapter->vfinfo[i].vfstats.gptc = 0;
+ adapter->vfinfo[i].last_vfstats.gorc =
+ IXGBE_READ_REG(hw, IXGBE_PVFGORC_LSB(i));
+ adapter->vfinfo[i].saved_rst_vfstats.gorc +=
+ adapter->vfinfo[i].vfstats.gorc;
+ adapter->vfinfo[i].vfstats.gorc = 0;
+ adapter->vfinfo[i].last_vfstats.gotc =
+ IXGBE_READ_REG(hw, IXGBE_PVFGOTC_LSB(i));
+ adapter->vfinfo[i].saved_rst_vfstats.gotc +=
+ adapter->vfinfo[i].vfstats.gotc;
+ adapter->vfinfo[i].vfstats.gotc = 0;
+ adapter->vfinfo[i].last_vfstats.mprc =
+ IXGBE_READ_REG(hw, IXGBE_PVFMPRC(i));
+ adapter->vfinfo[i].saved_rst_vfstats.mprc +=
+ adapter->vfinfo[i].vfstats.mprc;
+ adapter->vfinfo[i].vfstats.mprc = 0;
+ }
+}
+
+
+
+void ixgbe_reinit_locked(struct ixgbe_adapter *adapter)
+{
+#ifdef NO_VNIC
+ WARN_ON(in_interrupt());
+ /* put off any impending NetWatchDogTimeout */
+ adapter->netdev->trans_start = jiffies;
+
+ while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
+ usleep_range(1000, 2000);
+ ixgbe_down(adapter);
+ /*
+ * If SR-IOV enabled then wait a bit before bringing the adapter
+ * back up to give the VFs time to respond to the reset. The
+ * two second wait is based upon the watchdog timer cycle in
+ * the VF driver.
+ */
+ if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+ msleep(2000);
+ ixgbe_up(adapter);
+ clear_bit(__IXGBE_RESETTING, &adapter->state);
+#endif
+}
+
+void ixgbe_up(struct ixgbe_adapter *adapter)
+{
+ /* hardware has been reset, we need to reload some things */
+ //ixgbe_configure(adapter);
+
+ //ixgbe_up_complete(adapter);
+}
+
+void ixgbe_reset(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct net_device *netdev = adapter->netdev;
+ int err;
+
+ /* lock SFP init bit to prevent race conditions with the watchdog */
+ while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
+ usleep_range(1000, 2000);
+
+ /* clear all SFP and link config related flags while holding SFP_INIT */
+ adapter->flags2 &= ~(IXGBE_FLAG2_SEARCH_FOR_SFP |
+ IXGBE_FLAG2_SFP_NEEDS_RESET);
+ adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
+
+ err = hw->mac.ops.init_hw(hw);
+ switch (err) {
+ case 0:
+ case IXGBE_ERR_SFP_NOT_PRESENT:
+ case IXGBE_ERR_SFP_NOT_SUPPORTED:
+ break;
+ case IXGBE_ERR_MASTER_REQUESTS_PENDING:
+ e_dev_err("master disable timed out\n");
+ break;
+ case IXGBE_ERR_EEPROM_VERSION:
+ /* We are running on a pre-production device, log a warning */
+ e_dev_warn("This device is a pre-production adapter/LOM. "
+ "Please be aware there may be issues associated "
+ "with your hardware. If you are experiencing "
+ "problems please contact your Intel or hardware "
+ "representative who provided you with this "
+ "hardware.\n");
+ break;
+ default:
+ e_dev_err("Hardware Error: %d\n", err);
+ }
+
+ clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
+
+ ixgbe_flush_sw_mac_table(adapter);
+ memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr,
+ netdev->addr_len);
+ adapter->mac_table[0].queue = adapter->num_vfs;
+ adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT |
+ IXGBE_MAC_STATE_IN_USE);
+ hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr,
+ adapter->mac_table[0].queue,
+ IXGBE_RAH_AV);
+}
+
+
+
+
+
+
+void ixgbe_down(struct ixgbe_adapter *adapter)
+{
+#ifdef NO_VNIC
+ struct net_device *netdev = adapter->netdev;
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 rxctrl;
+ int i;
+
+ /* signal that we are down to the interrupt handler */
+ set_bit(__IXGBE_DOWN, &adapter->state);
+
+ /* disable receives */
+ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+ IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
+
+ /* disable all enabled rx queues */
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ /* this call also flushes the previous write */
+ ixgbe_disable_rx_queue(adapter, adapter->rx_ring[i]);
+
+ usleep_range(10000, 20000);
+
+ netif_tx_stop_all_queues(netdev);
+
+ /* call carrier off first to avoid false dev_watchdog timeouts */
+ netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
+
+ ixgbe_irq_disable(adapter);
+
+ ixgbe_napi_disable_all(adapter);
+
+ adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
+ IXGBE_FLAG2_RESET_REQUESTED);
+ adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
+
+ del_timer_sync(&adapter->service_timer);
+
+ if (adapter->num_vfs) {
+ /* Clear EITR Select mapping */
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, 0);
+
+ /* Mark all the VFs as inactive */
+ for (i = 0 ; i < adapter->num_vfs; i++)
+ adapter->vfinfo[i].clear_to_send = 0;
+
+ /* ping all the active vfs to let them know we are going down */
+ ixgbe_ping_all_vfs(adapter);
+
+ /* Disable all VFTE/VFRE TX/RX */
+ ixgbe_disable_tx_rx(adapter);
+ }
+
+ /* disable transmits in the hardware now that interrupts are off */
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ u8 reg_idx = adapter->tx_ring[i]->reg_idx;
+ IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
+ }
+
+ /* Disable the Tx DMA engine on 82599 and X540 */
+ switch (hw->mac.type) {
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL,
+ (IXGBE_READ_REG(hw, IXGBE_DMATXCTL) &
+ ~IXGBE_DMATXCTL_TE));
+ break;
+ default:
+ break;
+ }
+
+#ifdef HAVE_PCI_ERS
+ if (!pci_channel_offline(adapter->pdev))
+#endif
+ ixgbe_reset(adapter);
+ /* power down the optics */
+ if ((hw->phy.multispeed_fiber) ||
+ ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) &&
+ (hw->mac.type == ixgbe_mac_82599EB)))
+ ixgbe_disable_tx_laser(hw);
+
+ ixgbe_clean_all_tx_rings(adapter);
+ ixgbe_clean_all_rx_rings(adapter);
+
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+ /* since we reset the hardware DCA settings were cleared */
+ ixgbe_setup_dca(adapter);
+#endif
+
+#endif /* NO_VNIC */
+}
+
+#ifndef NO_VNIC
+
+#undef IXGBE_FCOE
+
+/* Artificial max queue cap per traffic class in DCB mode */
+#define DCB_QUEUE_CAP 8
+
+/**
+ * ixgbe_set_dcb_queues: Allocate queues for a DCB-enabled device
+ * @adapter: board private structure to initialize
+ *
+ * When DCB (Data Center Bridging) is enabled, allocate queues for
+ * each traffic class. If multiqueue isn't available,then abort DCB
+ * initialization.
+ *
+ * This function handles all combinations of DCB, RSS, and FCoE.
+ *
+ **/
+static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
+{
+ int tcs;
+#ifdef HAVE_MQPRIO
+ int rss_i, i, offset = 0;
+ struct net_device *dev = adapter->netdev;
+
+ /* Map queue offset and counts onto allocated tx queues */
+ tcs = netdev_get_num_tc(dev);
+
+ if (!tcs)
+ return false;
+
+ rss_i = min_t(int, dev->num_tx_queues / tcs, num_online_cpus());
+
+ if (rss_i > DCB_QUEUE_CAP)
+ rss_i = DCB_QUEUE_CAP;
+
+ for (i = 0; i < tcs; i++) {
+ netdev_set_tc_queue(dev, i, rss_i, offset);
+ offset += rss_i;
+ }
+
+ adapter->num_tx_queues = rss_i * tcs;
+ adapter->num_rx_queues = rss_i * tcs;
+
+#ifdef IXGBE_FCOE
+ /* FCoE enabled queues require special configuration indexed
+ * by feature specific indices and mask. Here we map FCoE
+ * indices onto the DCB queue pairs allowing FCoE to own
+ * configuration later.
+ */
+
+ if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
+ struct ixgbe_ring_feature *f;
+ int tc;
+ u8 prio_tc[IXGBE_DCB_MAX_USER_PRIORITY] = {0};
+
+ ixgbe_dcb_unpack_map_cee(&adapter->dcb_cfg,
+ IXGBE_DCB_TX_CONFIG,
+ prio_tc);
+ tc = prio_tc[adapter->fcoe.up];
+
+ f = &adapter->ring_feature[RING_F_FCOE];
+ f->indices = min_t(int, rss_i, f->indices);
+ f->mask = rss_i * tc;
+ }
+#endif /* IXGBE_FCOE */
+#else
+ if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+ return false;
+
+ /* Enable one Queue per traffic class */
+ tcs = adapter->tc;
+ if (!tcs)
+ return false;
+
+#ifdef IXGBE_FCOE
+ if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
+ struct ixgbe_ring_feature *f;
+ int tc = netdev_get_prio_tc_map(adapter->netdev,
+ adapter->fcoe.up);
+
+ f = &adapter->ring_feature[RING_F_FCOE];
+
+ /*
+ * We have max 8 queues for FCoE, where 8 the is
+ * FCoE redirection table size. We must also share
+ * ring resources with network traffic so if FCoE TC is
+ * 4 or greater and we are in 8 TC mode we can only use
+ * 7 queues.
+ */
+ if ((tcs > 4) && (tc >= 4) && (f->indices > 7))
+ f->indices = 7;
+
+ f->indices = min_t(int, num_online_cpus(), f->indices);
+ f->mask = tcs;
+
+ adapter->num_rx_queues = f->indices + tcs;
+ adapter->num_tx_queues = f->indices + tcs;
+
+ return true;
+ }
+
+#endif /* IXGBE_FCOE */
+ adapter->num_rx_queues = tcs;
+ adapter->num_tx_queues = tcs;
+#endif /* HAVE_MQ */
+
+ return true;
+}
+
+/**
+ * ixgbe_set_vmdq_queues: Allocate queues for VMDq devices
+ * @adapter: board private structure to initialize
+ *
+ * When VMDq (Virtual Machine Devices queue) is enabled, allocate queues
+ * and VM pools where appropriate. If RSS is available, then also try and
+ * enable RSS and map accordingly.
+ *
+ **/
+static bool ixgbe_set_vmdq_queues(struct ixgbe_adapter *adapter)
+{
+ int vmdq_i = adapter->ring_feature[RING_F_VMDQ].indices;
+ int vmdq_m = 0;
+ int rss_i = adapter->ring_feature[RING_F_RSS].indices;
+ unsigned long i;
+ int rss_shift;
+ bool ret = false;
+
+
+ switch (adapter->flags & (IXGBE_FLAG_RSS_ENABLED
+ | IXGBE_FLAG_DCB_ENABLED
+ | IXGBE_FLAG_VMDQ_ENABLED)) {
+
+ case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_VMDQ_ENABLED):
+ switch (adapter->hw.mac.type) {
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ vmdq_i = min((int)IXGBE_MAX_VMDQ_INDICES, vmdq_i);
+ if (vmdq_i > 32)
+ rss_i = 2;
+ else
+ rss_i = 4;
+ i = rss_i;
+ rss_shift = find_first_bit(&i, sizeof(i) * 8);
+ vmdq_m = ((IXGBE_MAX_VMDQ_INDICES - 1) <<
+ rss_shift) & (MAX_RX_QUEUES - 1);
+ break;
+ default:
+ break;
+ }
+ adapter->num_rx_queues = vmdq_i * rss_i;
+ adapter->num_tx_queues = min((int)MAX_TX_QUEUES, vmdq_i * rss_i);
+ ret = true;
+ break;
+
+ case (IXGBE_FLAG_VMDQ_ENABLED):
+ switch (adapter->hw.mac.type) {
+ case ixgbe_mac_82598EB:
+ vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1);
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1) << 1;
+ break;
+ default:
+ break;
+ }
+ adapter->num_rx_queues = vmdq_i;
+ adapter->num_tx_queues = vmdq_i;
+ ret = true;
+ break;
+
+ default:
+ ret = false;
+ goto vmdq_queues_out;
+ }
+
+ if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) {
+ adapter->num_rx_pools = vmdq_i;
+ adapter->num_rx_queues_per_pool = adapter->num_rx_queues /
+ vmdq_i;
+ } else {
+ adapter->num_rx_pools = adapter->num_rx_queues;
+ adapter->num_rx_queues_per_pool = 1;
+ }
+ /* save the mask for later use */
+ adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
+vmdq_queues_out:
+ return ret;
+}
+
+/**
+ * ixgbe_set_rss_queues: Allocate queues for RSS
+ * @adapter: board private structure to initialize
+ *
+ * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try
+ * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
+ *
+ **/
+static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_ring_feature *f;
+
+ if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) {
+ adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+ return false;
+ }
+
+ /* set mask for 16 queue limit of RSS */
+ f = &adapter->ring_feature[RING_F_RSS];
+ f->mask = 0xF;
+
+ /*
+ * Use Flow Director in addition to RSS to ensure the best
+ * distribution of flows across cores, even when an FDIR flow
+ * isn't matched.
+ */
+ if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
+ f = &adapter->ring_feature[RING_F_FDIR];
+
+ f->indices = min_t(int, num_online_cpus(), f->indices);
+ f->mask = 0;
+ }
+
+ adapter->num_rx_queues = f->indices;
+#ifdef HAVE_TX_MQ
+ adapter->num_tx_queues = f->indices;
+#endif
+
+ return true;
+}
+
+#ifdef IXGBE_FCOE
+/**
+ * ixgbe_set_fcoe_queues: Allocate queues for Fiber Channel over Ethernet (FCoE)
+ * @adapter: board private structure to initialize
+ *
+ * FCoE RX FCRETA can use up to 8 rx queues for up to 8 different exchanges.
+ * The ring feature mask is not used as a mask for FCoE, as it can take any 8
+ * rx queues out of the max number of rx queues, instead, it is used as the
+ * index of the first rx queue used by FCoE.
+ *
+ **/
+static bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_ring_feature *f;
+
+ if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
+ return false;
+
+ ixgbe_set_rss_queues(adapter);
+
+ f = &adapter->ring_feature[RING_F_FCOE];
+ f->indices = min_t(int, num_online_cpus(), f->indices);
+
+ /* adding FCoE queues */
+ f->mask = adapter->num_rx_queues;
+ adapter->num_rx_queues += f->indices;
+ adapter->num_tx_queues += f->indices;
+
+ return true;
+}
+
+#endif /* IXGBE_FCOE */
+/*
+ * ixgbe_set_num_queues: Allocate queues for device, feature dependent
+ * @adapter: board private structure to initialize
+ *
+ * This is the top level queue allocation routine. The order here is very
+ * important, starting with the "most" number of features turned on at once,
+ * and ending with the smallest set of features. This way large combinations
+ * can be allocated if they're turned on, and smaller combinations are the
+ * fallthrough conditions.
+ *
+ **/
+static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
+{
+ /* Start with base case */
+ adapter->num_rx_queues = 1;
+ adapter->num_tx_queues = 1;
+ adapter->num_rx_pools = adapter->num_rx_queues;
+ adapter->num_rx_queues_per_pool = 1;
+
+ if (ixgbe_set_vmdq_queues(adapter))
+ return;
+
+ if (ixgbe_set_dcb_queues(adapter))
+ return;
+
+#ifdef IXGBE_FCOE
+ if (ixgbe_set_fcoe_queues(adapter))
+ return;
+
+#endif /* IXGBE_FCOE */
+ ixgbe_set_rss_queues(adapter);
+}
+
+#endif
+
+
+/**
+ * ixgbe_sw_init - Initialize general software structures (struct ixgbe_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * ixgbe_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct pci_dev *pdev = adapter->pdev;
+ int err;
+
+ /* PCI config space info */
+
+ hw->vendor_id = pdev->vendor;
+ hw->device_id = pdev->device;
+ pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+ hw->subsystem_vendor_id = pdev->subsystem_vendor;
+ hw->subsystem_device_id = pdev->subsystem_device;
+
+ err = ixgbe_init_shared_code(hw);
+ if (err) {
+ e_err(probe, "init_shared_code failed: %d\n", err);
+ goto out;
+ }
+ adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) *
+ hw->mac.num_rar_entries,
+ GFP_ATOMIC);
+ /* Set capability flags */
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ adapter->flags |= IXGBE_FLAG_MSI_CAPABLE |
+ IXGBE_FLAG_MSIX_CAPABLE |
+ IXGBE_FLAG_MQ_CAPABLE |
+ IXGBE_FLAG_RSS_CAPABLE;
+ adapter->flags |= IXGBE_FLAG_DCB_CAPABLE;
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+ adapter->flags |= IXGBE_FLAG_DCA_CAPABLE;
+#endif
+ adapter->flags &= ~IXGBE_FLAG_SRIOV_CAPABLE;
+ adapter->flags2 &= ~IXGBE_FLAG2_RSC_CAPABLE;
+
+ if (hw->device_id == IXGBE_DEV_ID_82598AT)
+ adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE;
+
+ adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82598;
+ break;
+ case ixgbe_mac_X540:
+ adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
+ case ixgbe_mac_82599EB:
+ adapter->flags |= IXGBE_FLAG_MSI_CAPABLE |
+ IXGBE_FLAG_MSIX_CAPABLE |
+ IXGBE_FLAG_MQ_CAPABLE |
+ IXGBE_FLAG_RSS_CAPABLE;
+ adapter->flags |= IXGBE_FLAG_DCB_CAPABLE;
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+ adapter->flags |= IXGBE_FLAG_DCA_CAPABLE;
+#endif
+ adapter->flags |= IXGBE_FLAG_SRIOV_CAPABLE;
+ adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE;
+#ifdef IXGBE_FCOE
+ adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE;
+ adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
+ adapter->ring_feature[RING_F_FCOE].indices = 0;
+#ifdef CONFIG_DCB
+ /* Default traffic class to use for FCoE */
+ adapter->fcoe.tc = IXGBE_FCOE_DEFTC;
+ adapter->fcoe.up = IXGBE_FCOE_DEFTC;
+ adapter->fcoe.up_set = IXGBE_FCOE_DEFTC;
+#endif
+#endif
+ if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM)
+ adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
+#ifndef IXGBE_NO_SMART_SPEED
+ hw->phy.smart_speed = ixgbe_smart_speed_on;
+#else
+ hw->phy.smart_speed = ixgbe_smart_speed_off;
+#endif
+ adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82599;
+ default:
+ break;
+ }
+
+ /* n-tuple support exists, always init our spinlock */
+ //spin_lock_init(&adapter->fdir_perfect_lock);
+
+ if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE) {
+ int j;
+ struct ixgbe_dcb_tc_config *tc;
+ int dcb_i = IXGBE_DCB_MAX_TRAFFIC_CLASS;
+
+
+ adapter->dcb_cfg.num_tcs.pg_tcs = dcb_i;
+ adapter->dcb_cfg.num_tcs.pfc_tcs = dcb_i;
+ for (j = 0; j < dcb_i; j++) {
+ tc = &adapter->dcb_cfg.tc_config[j];
+ tc->path[IXGBE_DCB_TX_CONFIG].bwg_id = 0;
+ tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 100 / dcb_i;
+ tc->path[IXGBE_DCB_RX_CONFIG].bwg_id = 0;
+ tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 100 / dcb_i;
+ tc->pfc = ixgbe_dcb_pfc_disabled;
+ if (j == 0) {
+ /* total of all TCs bandwidth needs to be 100 */
+ tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent +=
+ 100 % dcb_i;
+ tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent +=
+ 100 % dcb_i;
+ }
+ }
+
+ /* Initialize default user to priority mapping, UPx->TC0 */
+ tc = &adapter->dcb_cfg.tc_config[0];
+ tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0xFF;
+ tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0xFF;
+
+ adapter->dcb_cfg.bw_percentage[IXGBE_DCB_TX_CONFIG][0] = 100;
+ adapter->dcb_cfg.bw_percentage[IXGBE_DCB_RX_CONFIG][0] = 100;
+ adapter->dcb_cfg.rx_pba_cfg = ixgbe_dcb_pba_equal;
+ adapter->dcb_cfg.pfc_mode_enable = false;
+ adapter->dcb_cfg.round_robin_enable = false;
+ adapter->dcb_set_bitmap = 0x00;
+#ifdef CONFIG_DCB
+ adapter->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_CEE;
+#endif /* CONFIG_DCB */
+
+ if (hw->mac.type == ixgbe_mac_X540) {
+ adapter->dcb_cfg.num_tcs.pg_tcs = 4;
+ adapter->dcb_cfg.num_tcs.pfc_tcs = 4;
+ }
+ }
+#ifdef CONFIG_DCB
+ /* XXX does this need to be initialized even w/o DCB? */
+ //memcpy(&adapter->temp_dcb_cfg, &adapter->dcb_cfg,
+ // sizeof(adapter->temp_dcb_cfg));
+
+#endif
+ //if (hw->mac.type == ixgbe_mac_82599EB ||
+ // hw->mac.type == ixgbe_mac_X540)
+ // hw->mbx.ops.init_params(hw);
+
+ /* default flow control settings */
+ hw->fc.requested_mode = ixgbe_fc_full;
+ hw->fc.current_mode = ixgbe_fc_full; /* init for ethtool output */
+
+ adapter->last_lfc_mode = hw->fc.current_mode;
+ ixgbe_pbthresh_setup(adapter);
+ hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE;
+ hw->fc.send_xon = true;
+ hw->fc.disable_fc_autoneg = false;
+
+ /* set default ring sizes */
+ adapter->tx_ring_count = IXGBE_DEFAULT_TXD;
+ adapter->rx_ring_count = IXGBE_DEFAULT_RXD;
+
+ /* set default work limits */
+ adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK;
+ adapter->rx_work_limit = IXGBE_DEFAULT_RX_WORK;
+
+ set_bit(__IXGBE_DOWN, &adapter->state);
+out:
+ return err;
+}
+
+/**
+ * ixgbe_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ **/
+int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
+{
+ struct device *dev = tx_ring->dev;
+ //int orig_node = dev_to_node(dev);
+ int numa_node = -1;
+ int size;
+
+ size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count;
+
+ if (tx_ring->q_vector)
+ numa_node = tx_ring->q_vector->numa_node;
+
+ tx_ring->tx_buffer_info = vzalloc_node(size, numa_node);
+ if (!tx_ring->tx_buffer_info)
+ tx_ring->tx_buffer_info = vzalloc(size);
+ if (!tx_ring->tx_buffer_info)
+ goto err;
+
+ /* round up to nearest 4K */
+ tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
+ tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+ //set_dev_node(dev, numa_node);
+ //tx_ring->desc = dma_alloc_coherent(dev,
+ // tx_ring->size,
+ // &tx_ring->dma,
+ // GFP_KERNEL);
+ //set_dev_node(dev, orig_node);
+ //if (!tx_ring->desc)
+ // tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+ // &tx_ring->dma, GFP_KERNEL);
+ //if (!tx_ring->desc)
+ // goto err;
+
+ return 0;
+
+err:
+ vfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+ dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * ixgbe_setup_all_tx_resources - allocate all queues Tx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not). It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
+{
+ int i, err = 0;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ err = ixgbe_setup_tx_resources(adapter->tx_ring[i]);
+ if (!err)
+ continue;
+ e_err(probe, "Allocation for Tx Queue %u failed\n", i);
+ break;
+ }
+
+ return err;
+}
+
+/**
+ * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring: rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
+{
+ struct device *dev = rx_ring->dev;
+ //int orig_node = dev_to_node(dev);
+ int numa_node = -1;
+ int size;
+
+ size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
+
+ if (rx_ring->q_vector)
+ numa_node = rx_ring->q_vector->numa_node;
+
+ rx_ring->rx_buffer_info = vzalloc_node(size, numa_node);
+ if (!rx_ring->rx_buffer_info)
+ rx_ring->rx_buffer_info = vzalloc(size);
+ if (!rx_ring->rx_buffer_info)
+ goto err;
+
+ /* Round up to nearest 4K */
+ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
+ rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+#ifdef NO_VNIC
+ set_dev_node(dev, numa_node);
+ rx_ring->desc = dma_alloc_coherent(dev,
+ rx_ring->size,
+ &rx_ring->dma,
+ GFP_KERNEL);
+ set_dev_node(dev, orig_node);
+ if (!rx_ring->desc)
+ rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+ &rx_ring->dma, GFP_KERNEL);
+ if (!rx_ring->desc)
+ goto err;
+
+#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+ ixgbe_init_rx_page_offset(rx_ring);
+
+#endif
+
+#endif /* NO_VNIC */
+ return 0;
+err:
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+ dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * ixgbe_setup_all_rx_resources - allocate all queues Rx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not). It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
+{
+ int i, err = 0;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ err = ixgbe_setup_rx_resources(adapter->rx_ring[i]);
+ if (!err)
+ continue;
+ e_err(probe, "Allocation for Rx Queue %u failed\n", i);
+ break;
+ }
+
+ return err;
+}
+
+/**
+ * ixgbe_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+void ixgbe_free_tx_resources(struct ixgbe_ring *tx_ring)
+{
+ //ixgbe_clean_tx_ring(tx_ring);
+
+ vfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+
+ /* if not set, then don't free */
+ if (!tx_ring->desc)
+ return;
+
+ //dma_free_coherent(tx_ring->dev, tx_ring->size,
+ // tx_ring->desc, tx_ring->dma);
+
+ tx_ring->desc = NULL;
+}
+
+/**
+ * ixgbe_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ if (adapter->tx_ring[i]->desc)
+ ixgbe_free_tx_resources(adapter->tx_ring[i]);
+}
+
+/**
+ * ixgbe_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
+{
+ //ixgbe_clean_rx_ring(rx_ring);
+
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+
+ /* if not set, then don't free */
+ if (!rx_ring->desc)
+ return;
+
+ //dma_free_coherent(rx_ring->dev, rx_ring->size,
+ // rx_ring->desc, rx_ring->dma);
+
+ rx_ring->desc = NULL;
+}
+
+/**
+ * ixgbe_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ if (adapter->rx_ring[i]->desc)
+ ixgbe_free_rx_resources(adapter->rx_ring[i]);
+}
+
+
+/**
+ * ixgbe_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP). At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+//static
+int ixgbe_open(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ int err;
+
+ /* disallow open during test */
+ if (test_bit(__IXGBE_TESTING, &adapter->state))
+ return -EBUSY;
+
+ netif_carrier_off(netdev);
+
+ /* allocate transmit descriptors */
+ err = ixgbe_setup_all_tx_resources(adapter);
+ if (err)
+ goto err_setup_tx;
+
+ /* allocate receive descriptors */
+ err = ixgbe_setup_all_rx_resources(adapter);
+ if (err)
+ goto err_setup_rx;
+
+#ifdef NO_VNIC
+ ixgbe_configure(adapter);
+
+ err = ixgbe_request_irq(adapter);
+ if (err)
+ goto err_req_irq;
+
+ ixgbe_up_complete(adapter);
+
+err_req_irq:
+#else
+ return 0;
+#endif
+err_setup_rx:
+ ixgbe_free_all_rx_resources(adapter);
+err_setup_tx:
+ ixgbe_free_all_tx_resources(adapter);
+ ixgbe_reset(adapter);
+
+ return err;
+}
+
+/**
+ * ixgbe_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS. The hardware is still under the drivers control, but
+ * needs to be disabled. A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+//static
+int ixgbe_close(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ //ixgbe_down(adapter);
+ //ixgbe_free_irq(adapter);
+
+ //ixgbe_fdir_filter_exit(adapter);
+
+ //ixgbe_free_all_tx_resources(adapter);
+ //ixgbe_free_all_rx_resources(adapter);
+
+ ixgbe_release_hw_control(adapter);
+
+ return 0;
+}
+
+
+
+
+
+/**
+ * ixgbe_get_stats - Get System Network Statistics
+ * @netdev: network interface device structure
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are actually updated from the timer callback.
+ **/
+//static
+struct net_device_stats *ixgbe_get_stats(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ /* update the stats data */
+ ixgbe_update_stats(adapter);
+
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+ /* only return the current stats */
+ return &netdev->stats;
+#else
+ /* only return the current stats */
+ return &adapter->net_stats;
+#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
+}
+
+/**
+ * ixgbe_update_stats - Update the board statistics counters.
+ * @adapter: board private structure
+ **/
+void ixgbe_update_stats(struct ixgbe_adapter *adapter)
+{
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+ struct net_device_stats *net_stats = &adapter->netdev->stats;
+#else
+ struct net_device_stats *net_stats = &adapter->net_stats;
+#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_hw_stats *hwstats = &adapter->stats;
+ u64 total_mpc = 0;
+ u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot;
+ u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
+ u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+ u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
+#ifndef IXGBE_NO_LRO
+ u32 flushed = 0, coal = 0;
+ int num_q_vectors = 1;
+#endif
+#ifdef IXGBE_FCOE
+ struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+ unsigned int cpu;
+ u64 fcoe_noddp_counts_sum = 0, fcoe_noddp_ext_buff_counts_sum = 0;
+#endif /* IXGBE_FCOE */
+
+ printk(KERN_DEBUG "ixgbe_update_stats, tx_queues=%d, rx_queues=%d\n",
+ adapter->num_tx_queues, adapter->num_rx_queues);
+
+ if (test_bit(__IXGBE_DOWN, &adapter->state) ||
+ test_bit(__IXGBE_RESETTING, &adapter->state))
+ return;
+
+#ifndef IXGBE_NO_LRO
+ if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
+ num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+
+#endif
+ if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+ u64 rsc_count = 0;
+ u64 rsc_flush = 0;
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ rsc_count += adapter->rx_ring[i]->rx_stats.rsc_count;
+ rsc_flush += adapter->rx_ring[i]->rx_stats.rsc_flush;
+ }
+ adapter->rsc_total_count = rsc_count;
+ adapter->rsc_total_flush = rsc_flush;
+ }
+
+#ifndef IXGBE_NO_LRO
+ for (i = 0; i < num_q_vectors; i++) {
+ struct ixgbe_q_vector *q_vector = adapter->q_vector[i];
+ if (!q_vector)
+ continue;
+ flushed += q_vector->lrolist.stats.flushed;
+ coal += q_vector->lrolist.stats.coal;
+ }
+ adapter->lro_stats.flushed = flushed;
+ adapter->lro_stats.coal = coal;
+
+#endif
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct ixgbe_ring *rx_ring = adapter->rx_ring[i];
+ non_eop_descs += rx_ring->rx_stats.non_eop_descs;
+ alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
+ alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
+ hw_csum_rx_error += rx_ring->rx_stats.csum_err;
+ bytes += rx_ring->stats.bytes;
+ packets += rx_ring->stats.packets;
+
+ }
+ adapter->non_eop_descs = non_eop_descs;
+ adapter->alloc_rx_page_failed = alloc_rx_page_failed;
+ adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
+ adapter->hw_csum_rx_error = hw_csum_rx_error;
+ net_stats->rx_bytes = bytes;
+ net_stats->rx_packets = packets;
+
+ bytes = 0;
+ packets = 0;
+ /* gather some stats to the adapter struct that are per queue */
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
+ restart_queue += tx_ring->tx_stats.restart_queue;
+ tx_busy += tx_ring->tx_stats.tx_busy;
+ bytes += tx_ring->stats.bytes;
+ packets += tx_ring->stats.packets;
+ }
+ adapter->restart_queue = restart_queue;
+ adapter->tx_busy = tx_busy;
+ net_stats->tx_bytes = bytes;
+ net_stats->tx_packets = packets;
+
+ hwstats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
+
+ /* 8 register reads */
+ for (i = 0; i < 8; i++) {
+ /* for packet buffers not used, the register should read 0 */
+ mpc = IXGBE_READ_REG(hw, IXGBE_MPC(i));
+ missed_rx += mpc;
+ hwstats->mpc[i] += mpc;
+ total_mpc += hwstats->mpc[i];
+ hwstats->pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
+ hwstats->pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ hwstats->rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
+ hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
+ hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
+ hwstats->pxonrxc[i] +=
+ IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
+ break;
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ hwstats->pxonrxc[i] +=
+ IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
+ break;
+ default:
+ break;
+ }
+ }
+
+ /*16 register reads */
+ for (i = 0; i < 16; i++) {
+ hwstats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
+ hwstats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
+ if ((hw->mac.type == ixgbe_mac_82599EB) ||
+ (hw->mac.type == ixgbe_mac_X540)) {
+ hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC_L(i));
+ IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); /* to clear */
+ hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC_L(i));
+ IXGBE_READ_REG(hw, IXGBE_QBRC_H(i)); /* to clear */
+ }
+ }
+
+ hwstats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
+ /* work around hardware counting issue */
+ hwstats->gprc -= missed_rx;
+
+ ixgbe_update_xoff_received(adapter);
+
+ /* 82598 hardware only has a 32 bit counter in the high register */
+ switch (hw->mac.type) {
+ case ixgbe_mac_82598EB:
+ hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
+ hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
+ hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
+ hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORH);
+ break;
+ case ixgbe_mac_X540:
+ /* OS2BMC stats are X540 only*/
+ hwstats->o2bgptc += IXGBE_READ_REG(hw, IXGBE_O2BGPTC);
+ hwstats->o2bspc += IXGBE_READ_REG(hw, IXGBE_O2BSPC);
+ hwstats->b2ospc += IXGBE_READ_REG(hw, IXGBE_B2OSPC);
+ hwstats->b2ogprc += IXGBE_READ_REG(hw, IXGBE_B2OGPRC);
+ case ixgbe_mac_82599EB:
+ for (i = 0; i < 16; i++)
+ adapter->hw_rx_no_dma_resources +=
+ IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
+ hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL);
+ IXGBE_READ_REG(hw, IXGBE_GORCH); /* to clear */
+ hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL);
+ IXGBE_READ_REG(hw, IXGBE_GOTCH); /* to clear */
+ hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORL);
+ IXGBE_READ_REG(hw, IXGBE_TORH); /* to clear */
+ hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
+#ifdef HAVE_TX_MQ
+ hwstats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
+ hwstats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
+#endif /* HAVE_TX_MQ */
+#ifdef IXGBE_FCOE
+ hwstats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
+ hwstats->fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
+ hwstats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
+ hwstats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
+ hwstats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
+ hwstats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
+ hwstats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
+ /* Add up per cpu counters for total ddp aloc fail */
+ if (fcoe && fcoe->pcpu_noddp && fcoe->pcpu_noddp_ext_buff) {
+ for_each_possible_cpu(cpu) {
+ fcoe_noddp_counts_sum +=
+ *per_cpu_ptr(fcoe->pcpu_noddp, cpu);
+ fcoe_noddp_ext_buff_counts_sum +=
+ *per_cpu_ptr(fcoe->
+ pcpu_noddp_ext_buff, cpu);
+ }
+ }
+ hwstats->fcoe_noddp = fcoe_noddp_counts_sum;
+ hwstats->fcoe_noddp_ext_buff = fcoe_noddp_ext_buff_counts_sum;
+
+#endif /* IXGBE_FCOE */
+ break;
+ default:
+ break;
+ }
+ bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
+ hwstats->bprc += bprc;
+ hwstats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ hwstats->mprc -= bprc;
+ hwstats->roc += IXGBE_READ_REG(hw, IXGBE_ROC);
+ hwstats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
+ hwstats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
+ hwstats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
+ hwstats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
+ hwstats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
+ hwstats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
+ hwstats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
+ lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
+ hwstats->lxontxc += lxon;
+ lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
+ hwstats->lxofftxc += lxoff;
+ hwstats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
+ hwstats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
+ /*
+ * 82598 errata - tx of flow control packets is included in tx counters
+ */
+ xon_off_tot = lxon + lxoff;
+ hwstats->gptc -= xon_off_tot;
+ hwstats->mptc -= xon_off_tot;
+ hwstats->gotc -= (xon_off_tot * (ETH_ZLEN + ETH_FCS_LEN));
+ hwstats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
+ hwstats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
+ hwstats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
+ hwstats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
+ hwstats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
+ hwstats->ptc64 -= xon_off_tot;
+ hwstats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
+ hwstats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
+ hwstats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
+ hwstats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
+ hwstats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
+ hwstats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
+ /* Fill out the OS statistics structure */
+ net_stats->multicast = hwstats->mprc;
+
+ /* Rx Errors */
+ net_stats->rx_errors = hwstats->crcerrs +
+ hwstats->rlec;
+ net_stats->rx_dropped = 0;
+ net_stats->rx_length_errors = hwstats->rlec;
+ net_stats->rx_crc_errors = hwstats->crcerrs;
+ net_stats->rx_missed_errors = total_mpc;
+
+ /*
+ * VF Stats Collection - skip while resetting because these
+ * are not clear on read and otherwise you'll sometimes get
+ * crazy values.
+ */
+ if (!test_bit(__IXGBE_RESETTING, &adapter->state)) {
+ for (i = 0; i < adapter->num_vfs; i++) {
+ UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPRC(i), \
+ adapter->vfinfo[i].last_vfstats.gprc, \
+ adapter->vfinfo[i].vfstats.gprc);
+ UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPTC(i), \
+ adapter->vfinfo[i].last_vfstats.gptc, \
+ adapter->vfinfo[i].vfstats.gptc);
+ UPDATE_VF_COUNTER_36bit(IXGBE_PVFGORC_LSB(i), \
+ IXGBE_PVFGORC_MSB(i), \
+ adapter->vfinfo[i].last_vfstats.gorc, \
+ adapter->vfinfo[i].vfstats.gorc);
+ UPDATE_VF_COUNTER_36bit(IXGBE_PVFGOTC_LSB(i), \
+ IXGBE_PVFGOTC_MSB(i), \
+ adapter->vfinfo[i].last_vfstats.gotc, \
+ adapter->vfinfo[i].vfstats.gotc);
+ UPDATE_VF_COUNTER_32bit(IXGBE_PVFMPRC(i), \
+ adapter->vfinfo[i].last_vfstats.mprc, \
+ adapter->vfinfo[i].vfstats.mprc);
+ }
+ }
+}
+
+
+#ifdef NO_VNIC
+
+/**
+ * ixgbe_watchdog_update_link - update the link status
+ * @adapter - pointer to the device adapter structure
+ * @link_speed - pointer to a u32 to store the link_speed
+ **/
+static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 link_speed = adapter->link_speed;
+ bool link_up = adapter->link_up;
+ bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
+
+ if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE))
+ return;
+
+ if (hw->mac.ops.check_link) {
+ hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+ } else {
+ /* always assume link is up, if no check link function */
+ link_speed = IXGBE_LINK_SPEED_10GB_FULL;
+ link_up = true;
+ }
+
+#ifdef HAVE_DCBNL_IEEE
+ if (adapter->ixgbe_ieee_pfc)
+ pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
+
+#endif
+ if (link_up && !((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && pfc_en)) {
+ hw->mac.ops.fc_enable(hw);
+ //ixgbe_set_rx_drop_en(adapter);
+ }
+
+ if (link_up ||
+ time_after(jiffies, (adapter->link_check_timeout +
+ IXGBE_TRY_LINK_TIMEOUT))) {
+ adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMC_LSC);
+ IXGBE_WRITE_FLUSH(hw);
+ }
+
+ adapter->link_up = link_up;
+ adapter->link_speed = link_speed;
+}
+#endif
+
+
+
+#ifdef NO_VNIC
+
+/**
+ * ixgbe_service_task - manages and runs subtasks
+ * @work: pointer to work_struct containing our data
+ **/
+static void ixgbe_service_task(struct work_struct *work)
+{
+ //struct ixgbe_adapter *adapter = container_of(work,
+ // struct ixgbe_adapter,
+ // service_task);
+
+ //ixgbe_reset_subtask(adapter);
+ //ixgbe_sfp_detection_subtask(adapter);
+ //ixgbe_sfp_link_config_subtask(adapter);
+ //ixgbe_check_overtemp_subtask(adapter);
+ //ixgbe_watchdog_subtask(adapter);
+#ifdef HAVE_TX_MQ
+ //ixgbe_fdir_reinit_subtask(adapter);
+#endif
+ //ixgbe_check_hang_subtask(adapter);
+
+ //ixgbe_service_event_complete(adapter);
+}
+
+
+
+
+#define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \
+ IXGBE_TXD_CMD_RS)
+
+
+/**
+ * ixgbe_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int ixgbe_set_mac(struct net_device *netdev, void *p)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct sockaddr *addr = p;
+ int ret;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ ixgbe_del_mac_filter(adapter, hw->mac.addr,
+ adapter->num_vfs);
+ memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+ memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+
+ /* set the correct pool for the new PF MAC address in entry 0 */
+ ret = ixgbe_add_mac_filter(adapter, hw->mac.addr,
+ adapter->num_vfs);
+ return ret > 0 ? 0 : ret;
+}
+
+
+/**
+ * ixgbe_ioctl -
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ **/
+static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+ switch (cmd) {
+#ifdef ETHTOOL_OPS_COMPAT
+ case SIOCETHTOOL:
+ return ethtool_ioctl(ifr);
+#endif
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+#endif /* NO_VNIC */
+
+
+void ixgbe_do_reset(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ if (netif_running(netdev))
+ ixgbe_reinit_locked(adapter);
+ else
+ ixgbe_reset(adapter);
+}
+
+
+
+
+
+
+/**
+ * ixgbe_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ixgbe_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * ixgbe_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+//static
+int ixgbe_kni_probe(struct pci_dev *pdev,
+ struct net_device **lad_dev)
+{
+ size_t count;
+ struct net_device *netdev;
+ struct ixgbe_adapter *adapter = NULL;
+ struct ixgbe_hw *hw = NULL;
+ static int cards_found;
+ int i, err;
+ u16 offset;
+ u16 eeprom_verh, eeprom_verl, eeprom_cfg_blkh, eeprom_cfg_blkl;
+ u32 etrack_id;
+ u16 build, major, patch;
+ char *info_string, *i_s_var;
+ u8 part_str[IXGBE_PBANUM_LENGTH];
+ enum ixgbe_mac_type mac_type = ixgbe_mac_unknown;
+#ifdef HAVE_TX_MQ
+ unsigned int indices = num_possible_cpus();
+#endif /* HAVE_TX_MQ */
+#ifdef IXGBE_FCOE
+ u16 device_caps;
+#endif
+ u16 wol_cap;
+
+ err = pci_enable_device_mem(pdev);
+ if (err)
+ return err;
+
+
+#ifdef NO_VNIC
+ err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
+ IORESOURCE_MEM), ixgbe_driver_name);
+ if (err) {
+ dev_err(pci_dev_to_dev(pdev),
+ "pci_request_selected_regions failed 0x%x\n", err);
+ goto err_pci_reg;
+ }
+#endif
+
+ /*
+ * The mac_type is needed before we have the adapter is set up
+ * so rather than maintain two devID -> MAC tables we dummy up
+ * an ixgbe_hw stuct and use ixgbe_set_mac_type.
+ */
+ hw = vmalloc(sizeof(struct ixgbe_hw));
+ if (!hw) {
+ pr_info("Unable to allocate memory for early mac "
+ "check\n");
+ } else {
+ hw->vendor_id = pdev->vendor;
+ hw->device_id = pdev->device;
+ ixgbe_set_mac_type(hw);
+ mac_type = hw->mac.type;
+ vfree(hw);
+ }
+
+#ifdef NO_VNIC
+ /*
+ * Workaround of Silicon errata on 82598. Disable LOs in the PCI switch
+ * port to which the 82598 is connected to prevent duplicate
+ * completions caused by LOs. We need the mac type so that we only
+ * do this on 82598 devices, ixgbe_set_mac_type does this for us if
+ * we set it's device ID.
+ */
+ if (mac_type == ixgbe_mac_82598EB)
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
+
+ pci_enable_pcie_error_reporting(pdev);
+
+ pci_set_master(pdev);
+#endif
+
+#ifdef HAVE_TX_MQ
+#ifdef CONFIG_DCB
+#ifdef HAVE_MQPRIO
+ indices *= IXGBE_DCB_MAX_TRAFFIC_CLASS;
+#else
+ indices = max_t(unsigned int, indices, IXGBE_MAX_DCB_INDICES);
+#endif /* HAVE_MQPRIO */
+#endif /* CONFIG_DCB */
+
+ if (mac_type == ixgbe_mac_82598EB)
+ indices = min_t(unsigned int, indices, IXGBE_MAX_RSS_INDICES);
+ else
+ indices = min_t(unsigned int, indices, IXGBE_MAX_FDIR_INDICES);
+
+#ifdef IXGBE_FCOE
+ indices += min_t(unsigned int, num_possible_cpus(),
+ IXGBE_MAX_FCOE_INDICES);
+#endif
+ netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);
+#else /* HAVE_TX_MQ */
+ netdev = alloc_etherdev(sizeof(struct ixgbe_adapter));
+#endif /* HAVE_TX_MQ */
+ if (!netdev) {
+ err = -ENOMEM;
+ goto err_alloc_etherdev;
+ }
+
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+
+ adapter = netdev_priv(netdev);
+ //pci_set_drvdata(pdev, adapter);
+
+ adapter->netdev = netdev;
+ adapter->pdev = pdev;
+ hw = &adapter->hw;
+ hw->back = adapter;
+ adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
+
+#ifdef HAVE_PCI_ERS
+ /*
+ * call save state here in standalone driver because it relies on
+ * adapter struct to exist, and needs to call netdev_priv
+ */
+ pci_save_state(pdev);
+
+#endif
+ hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+ pci_resource_len(pdev, 0));
+ if (!hw->hw_addr) {
+ err = -EIO;
+ goto err_ioremap;
+ }
+ //ixgbe_assign_netdev_ops(netdev);
+ ixgbe_set_ethtool_ops(netdev);
+
+ strlcpy(netdev->name, pci_name(pdev), sizeof(netdev->name));
+
+ adapter->bd_number = cards_found;
+
+ /* setup the private structure */
+ err = ixgbe_sw_init(adapter);
+ if (err)
+ goto err_sw_init;
+
+ /* Make it possible the adapter to be woken up via WOL */
+ switch (adapter->hw.mac.type) {
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * check_options must be called before setup_link to set up
+ * hw->fc completely
+ */
+ //ixgbe_check_options(adapter);
+
+#ifndef NO_VNIC
+ /* reset_hw fills in the perm_addr as well */
+ hw->phy.reset_if_overtemp = true;
+ err = hw->mac.ops.reset_hw(hw);
+ hw->phy.reset_if_overtemp = false;
+ if (err == IXGBE_ERR_SFP_NOT_PRESENT &&
+ hw->mac.type == ixgbe_mac_82598EB) {
+ err = 0;
+ } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
+ e_dev_err("failed to load because an unsupported SFP+ "
+ "module type was detected.\n");
+ e_dev_err("Reload the driver after installing a supported "
+ "module.\n");
+ goto err_sw_init;
+ } else if (err) {
+ e_dev_err("HW Init failed: %d\n", err);
+ goto err_sw_init;
+ }
+#endif
+
+ //if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+ // ixgbe_probe_vf(adapter);
+
+
+#ifdef MAX_SKB_FRAGS
+ netdev->features |= NETIF_F_SG |
+ NETIF_F_IP_CSUM;
+
+#ifdef NETIF_F_IPV6_CSUM
+ netdev->features |= NETIF_F_IPV6_CSUM;
+#endif
+
+#ifdef NETIF_F_HW_VLAN_TX
+ netdev->features |= NETIF_F_HW_VLAN_TX |
+ NETIF_F_HW_VLAN_RX;
+#endif
+#ifdef NETIF_F_TSO
+ netdev->features |= NETIF_F_TSO;
+#endif /* NETIF_F_TSO */
+#ifdef NETIF_F_TSO6
+ netdev->features |= NETIF_F_TSO6;
+#endif /* NETIF_F_TSO6 */
+#ifdef NETIF_F_RXHASH
+ netdev->features |= NETIF_F_RXHASH;
+#endif /* NETIF_F_RXHASH */
+
+#ifdef HAVE_NDO_SET_FEATURES
+ netdev->features |= NETIF_F_RXCSUM;
+
+ /* copy netdev features into list of user selectable features */
+ netdev->hw_features |= netdev->features;
+
+ /* give us the option of enabling RSC/LRO later */
+#ifdef IXGBE_NO_LRO
+ if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
+#endif
+ netdev->hw_features |= NETIF_F_LRO;
+
+#else
+#ifdef NETIF_F_GRO
+
+ /* this is only needed on kernels prior to 2.6.39 */
+ netdev->features |= NETIF_F_GRO;
+#endif /* NETIF_F_GRO */
+#endif
+
+#ifdef NETIF_F_HW_VLAN_TX
+ /* set this bit last since it cannot be part of hw_features */
+ netdev->features |= NETIF_F_HW_VLAN_FILTER;
+#endif
+ switch (adapter->hw.mac.type) {
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+ netdev->features |= NETIF_F_SCTP_CSUM;
+#ifdef HAVE_NDO_SET_FEATURES
+ netdev->hw_features |= NETIF_F_SCTP_CSUM |
+ NETIF_F_NTUPLE;
+#endif
+ break;
+ default:
+ break;
+ }
+
+#ifdef HAVE_NETDEV_VLAN_FEATURES
+ netdev->vlan_features |= NETIF_F_SG |
+ NETIF_F_IP_CSUM |
+ NETIF_F_IPV6_CSUM |
+ NETIF_F_TSO |
+ NETIF_F_TSO6;
+
+#endif /* HAVE_NETDEV_VLAN_FEATURES */
+ /*
+ * If perfect filters were enabled in check_options(), enable them
+ * on the netdevice too.
+ */
+ if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
+ netdev->features |= NETIF_F_NTUPLE;
+ if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)
+ adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+ adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+ if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) {
+ adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+ /* clear n-tuple support in the netdev unconditionally */
+ netdev->features &= ~NETIF_F_NTUPLE;
+ }
+
+#ifdef NETIF_F_RXHASH
+ if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED))
+ netdev->features &= ~NETIF_F_RXHASH;
+
+#endif /* NETIF_F_RXHASH */
+ if (netdev->features & NETIF_F_LRO) {
+ if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) &&
+ ((adapter->rx_itr_setting == 1) ||
+ (adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR))) {
+ adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
+ } else if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) {
+#ifdef IXGBE_NO_LRO
+ e_info(probe, "InterruptThrottleRate set too high, "
+ "disabling RSC\n");
+#else
+ e_info(probe, "InterruptThrottleRate set too high, "
+ "falling back to software LRO\n");
+#endif
+ }
+ }
+#ifdef CONFIG_DCB
+ //netdev->dcbnl_ops = &dcbnl_ops;
+#endif
+
+#ifdef IXGBE_FCOE
+#ifdef NETIF_F_FSO
+ if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) {
+ ixgbe_get_device_caps(hw, &device_caps);
+ if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS) {
+ adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
+ adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
+ e_info(probe, "FCoE offload feature is not available. "
+ "Disabling FCoE offload feature\n");
+ }
+#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE
+ else {
+ adapter->flags |= IXGBE_FLAG_FCOE_ENABLED;
+ adapter->ring_feature[RING_F_FCOE].indices =
+ IXGBE_FCRETA_SIZE;
+ netdev->features |= NETIF_F_FSO |
+ NETIF_F_FCOE_CRC |
+ NETIF_F_FCOE_MTU;
+ netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1;
+ }
+#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */
+#ifdef HAVE_NETDEV_VLAN_FEATURES
+ netdev->vlan_features |= NETIF_F_FSO |
+ NETIF_F_FCOE_CRC |
+ NETIF_F_FCOE_MTU;
+#endif /* HAVE_NETDEV_VLAN_FEATURES */
+ }
+#endif /* NETIF_F_FSO */
+#endif /* IXGBE_FCOE */
+
+#endif /* MAX_SKB_FRAGS */
+ /* make sure the EEPROM is good */
+ if (hw->eeprom.ops.validate_checksum &&
+ (hw->eeprom.ops.validate_checksum(hw, NULL) < 0)) {
+ e_dev_err("The EEPROM Checksum Is Not Valid\n");
+ err = -EIO;
+ goto err_sw_init;
+ }
+
+ memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len);
+#ifdef ETHTOOL_GPERMADDR
+ memcpy(netdev->perm_addr, hw->mac.perm_addr, netdev->addr_len);
+
+ if (ixgbe_validate_mac_addr(netdev->perm_addr)) {
+ e_dev_err("invalid MAC address\n");
+ err = -EIO;
+ goto err_sw_init;
+ }
+#else
+ if (ixgbe_validate_mac_addr(netdev->dev_addr)) {
+ e_dev_err("invalid MAC address\n");
+ err = -EIO;
+ goto err_sw_init;
+ }
+#endif
+ memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr,
+ netdev->addr_len);
+ adapter->mac_table[0].queue = adapter->num_vfs;
+ adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT |
+ IXGBE_MAC_STATE_IN_USE);
+ hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr,
+ adapter->mac_table[0].queue,
+ IXGBE_RAH_AV);
+
+ //setup_timer(&adapter->service_timer, &ixgbe_service_timer,
+ // (unsigned long) adapter);
+
+ //INIT_WORK(&adapter->service_task, ixgbe_service_task);
+ //clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);
+
+ //err = ixgbe_init_interrupt_scheme(adapter);
+ //if (err)
+ // goto err_sw_init;
+
+ //adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+ ixgbe_set_num_queues(adapter);
+
+ adapter->wol = 0;
+ /* WOL not supported for all but the following */
+ switch (pdev->device) {
+ case IXGBE_DEV_ID_82599_SFP:
+ /* Only these subdevice supports WOL */
+ switch (pdev->subsystem_device) {
+ case IXGBE_SUBDEV_ID_82599_560FLR:
+ /* only support first port */
+ if (hw->bus.func != 0)
+ break;
+ case IXGBE_SUBDEV_ID_82599_SFP:
+ adapter->wol = IXGBE_WUFC_MAG;
+ break;
+ }
+ break;
+ case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
+ /* All except this subdevice support WOL */
+ if (pdev->subsystem_device != IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ)
+ adapter->wol = IXGBE_WUFC_MAG;
+ break;
+ case IXGBE_DEV_ID_82599_KX4:
+ adapter->wol = IXGBE_WUFC_MAG;
+ break;
+ case IXGBE_DEV_ID_X540T:
+ /* Check eeprom to see if it is enabled */
+ ixgbe_read_eeprom(hw, 0x2c, &adapter->eeprom_cap);
+ wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK;
+
+ if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
+ ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) &&
+ (hw->bus.func == 0)))
+ adapter->wol = IXGBE_WUFC_MAG;
+ break;
+ }
+ //device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+
+ /*
+ * Save off EEPROM version number and Option Rom version which
+ * together make a unique identify for the eeprom
+ */
+ ixgbe_read_eeprom(hw, 0x2e, &eeprom_verh);
+ ixgbe_read_eeprom(hw, 0x2d, &eeprom_verl);
+
+ etrack_id = (eeprom_verh << 16) | eeprom_verl;
+
+ ixgbe_read_eeprom(hw, 0x17, &offset);
+
+ /* Make sure offset to SCSI block is valid */
+ if (!(offset == 0x0) && !(offset == 0xffff)) {
+ ixgbe_read_eeprom(hw, offset + 0x84, &eeprom_cfg_blkh);
+ ixgbe_read_eeprom(hw, offset + 0x83, &eeprom_cfg_blkl);
+
+ /* Only display Option Rom if exist */
+ if (eeprom_cfg_blkl && eeprom_cfg_blkh) {
+ major = eeprom_cfg_blkl >> 8;
+ build = (eeprom_cfg_blkl << 8) | (eeprom_cfg_blkh >> 8);
+ patch = eeprom_cfg_blkh & 0x00ff;
+
+ snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
+ "0x%08x, %d.%d.%d", etrack_id, major, build,
+ patch);
+ } else {
+ snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
+ "0x%08x", etrack_id);
+ }
+ } else {
+ snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
+ "0x%08x", etrack_id);
+ }
+
+ /* reset the hardware with the new settings */
+ err = hw->mac.ops.start_hw(hw);
+ if (err == IXGBE_ERR_EEPROM_VERSION) {
+ /* We are running on a pre-production device, log a warning */
+ e_dev_warn("This device is a pre-production adapter/LOM. "
+ "Please be aware there may be issues associated "
+ "with your hardware. If you are experiencing "
+ "problems please contact your Intel or hardware "
+ "representative who provided you with this "
+ "hardware.\n");
+ }
+ /* pick up the PCI bus settings for reporting later */
+ if (hw->mac.ops.get_bus_info)
+ hw->mac.ops.get_bus_info(hw);
+
+ strlcpy(netdev->name, "eth%d", sizeof(netdev->name));
+ *lad_dev = netdev;
+
+ adapter->netdev_registered = true;
+#ifdef NO_VNIC
+ /* power down the optics */
+ if ((hw->phy.multispeed_fiber) ||
+ ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) &&
+ (hw->mac.type == ixgbe_mac_82599EB)))
+ ixgbe_disable_tx_laser(hw);
+
+ /* carrier off reporting is important to ethtool even BEFORE open */
+ netif_carrier_off(netdev);
+ /* keep stopping all the transmit queues for older kernels */
+ netif_tx_stop_all_queues(netdev);
+#endif
+
+ /* print all messages at the end so that we use our eth%d name */
+ /* print bus type/speed/width info */
+ e_dev_info("(PCI Express:%s:%s) ",
+ (hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
+ hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" :
+ "Unknown"),
+ (hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
+ hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
+ hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" :
+ "Unknown"));
+
+ /* print the MAC address */
+ for (i = 0; i < 6; i++)
+ pr_cont("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
+
+ /* First try to read PBA as a string */
+ err = ixgbe_read_pba_string(hw, part_str, IXGBE_PBANUM_LENGTH);
+ if (err)
+ strlcpy(part_str, "Unknown", sizeof(part_str));
+ if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
+ e_info(probe, "MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
+ hw->mac.type, hw->phy.type, hw->phy.sfp_type, part_str);
+ else
+ e_info(probe, "MAC: %d, PHY: %d, PBA No: %s\n",
+ hw->mac.type, hw->phy.type, part_str);
+
+ if (((hw->bus.speed == ixgbe_bus_speed_2500) &&
+ (hw->bus.width <= ixgbe_bus_width_pcie_x4)) ||
+ (hw->bus.width <= ixgbe_bus_width_pcie_x2)) {
+ e_dev_warn("PCI-Express bandwidth available for this "
+ "card is not sufficient for optimal "
+ "performance.\n");
+ e_dev_warn("For optimal performance a x8 PCI-Express "
+ "slot is required.\n");
+ }
+
+#define INFO_STRING_LEN 255
+ info_string = kzalloc(INFO_STRING_LEN, GFP_KERNEL);
+ if (!info_string) {
+ e_err(probe, "allocation for info string failed\n");
+ goto no_info_string;
+ }
+ count = 0;
+ i_s_var = info_string;
+ count += snprintf(i_s_var, INFO_STRING_LEN, "Enabled Features: ");
+
+ i_s_var = info_string + count;
+ count += snprintf(i_s_var, (INFO_STRING_LEN - count),
+ "RxQ: %d TxQ: %d ", adapter->num_rx_queues,
+ adapter->num_tx_queues);
+ i_s_var = info_string + count;
+#ifdef IXGBE_FCOE
+ if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
+ count += snprintf(i_s_var, INFO_STRING_LEN - count, "FCoE ");
+ i_s_var = info_string + count;
+ }
+#endif
+ if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
+ count += snprintf(i_s_var, INFO_STRING_LEN - count,
+ "FdirHash ");
+ i_s_var = info_string + count;
+ }
+ if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
+ count += snprintf(i_s_var, INFO_STRING_LEN - count,
+ "FdirPerfect ");
+ i_s_var = info_string + count;
+ }
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+ count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCB ");
+ i_s_var = info_string + count;
+ }
+ if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
+ count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSS ");
+ i_s_var = info_string + count;
+ }
+ if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) {
+ count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCA ");
+ i_s_var = info_string + count;
+ }
+ if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+ count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSC ");
+ i_s_var = info_string + count;
+ }
+#ifndef IXGBE_NO_LRO
+ else if (netdev->features & NETIF_F_LRO) {
+ count += snprintf(i_s_var, INFO_STRING_LEN - count, "LRO ");
+ i_s_var = info_string + count;
+ }
+#endif
+
+ BUG_ON(i_s_var > (info_string + INFO_STRING_LEN));
+ /* end features printing */
+ e_info(probe, "%s\n", info_string);
+ kfree(info_string);
+no_info_string:
+
+ /* firmware requires blank driver version */
+ ixgbe_set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF);
+
+#if defined(HAVE_NETDEV_STORAGE_ADDRESS) && defined(NETDEV_HW_ADDR_T_SAN)
+ /* add san mac addr to netdev */
+ //ixgbe_add_sanmac_netdev(netdev);
+
+#endif /* (HAVE_NETDEV_STORAGE_ADDRESS) && (NETDEV_HW_ADDR_T_SAN) */
+ e_info(probe, "Intel(R) 10 Gigabit Network Connection\n");
+ cards_found++;
+
+#ifdef IXGBE_SYSFS
+ //if (ixgbe_sysfs_init(adapter))
+ // e_err(probe, "failed to allocate sysfs resources\n");
+#else
+#ifdef IXGBE_PROCFS
+ //if (ixgbe_procfs_init(adapter))
+ // e_err(probe, "failed to allocate procfs resources\n");
+#endif /* IXGBE_PROCFS */
+#endif /* IXGBE_SYSFS */
+
+ return 0;
+
+//err_register:
+ //ixgbe_clear_interrupt_scheme(adapter);
+ //ixgbe_release_hw_control(adapter);
+err_sw_init:
+ adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
+ if (adapter->mac_table)
+ kfree(adapter->mac_table);
+ iounmap(hw->hw_addr);
+err_ioremap:
+ free_netdev(netdev);
+err_alloc_etherdev:
+ //pci_release_selected_regions(pdev,
+ // pci_select_bars(pdev, IORESOURCE_MEM));
+//err_pci_reg:
+//err_dma:
+ pci_disable_device(pdev);
+ return err;
+}
+
+/**
+ * ixgbe_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * ixgbe_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device. The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+void ixgbe_kni_remove(struct pci_dev *pdev)
+{
+ pci_disable_device(pdev);
+}
+
+
+u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg)
+{
+ u16 value;
+ struct ixgbe_adapter *adapter = hw->back;
+
+ pci_read_config_word(adapter->pdev, reg, &value);
+ return value;
+}
+
+void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value)
+{
+ struct ixgbe_adapter *adapter = hw->back;
+
+ pci_write_config_word(adapter->pdev, reg, value);
+}
+
+void ewarn(struct ixgbe_hw *hw, const char *st, u32 status)
+{
+ struct ixgbe_adapter *adapter = hw->back;
+
+ netif_warn(adapter, drv, adapter->netdev, "%s", st);
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
new file mode 100644
index 00000000..124f00de
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
@@ -0,0 +1,105 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_MBX_H_
+#define _IXGBE_MBX_H_
+
+#include "ixgbe_type.h"
+
+#define IXGBE_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */
+#define IXGBE_ERR_MBX -100
+
+#define IXGBE_VFMAILBOX 0x002FC
+#define IXGBE_VFMBMEM 0x00200
+
+/* Define mailbox register bits */
+#define IXGBE_VFMAILBOX_REQ 0x00000001 /* Request for PF Ready bit */
+#define IXGBE_VFMAILBOX_ACK 0x00000002 /* Ack PF message received */
+#define IXGBE_VFMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */
+#define IXGBE_VFMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */
+#define IXGBE_VFMAILBOX_PFSTS 0x00000010 /* PF wrote a message in the MB */
+#define IXGBE_VFMAILBOX_PFACK 0x00000020 /* PF ack the previous VF msg */
+#define IXGBE_VFMAILBOX_RSTI 0x00000040 /* PF has reset indication */
+#define IXGBE_VFMAILBOX_RSTD 0x00000080 /* PF has indicated reset done */
+#define IXGBE_VFMAILBOX_R2C_BITS 0x000000B0 /* All read to clear bits */
+
+#define IXGBE_PFMAILBOX_STS 0x00000001 /* Initiate message send to VF */
+#define IXGBE_PFMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */
+#define IXGBE_PFMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */
+#define IXGBE_PFMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */
+#define IXGBE_PFMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */
+
+#define IXGBE_MBVFICR_VFREQ_MASK 0x0000FFFF /* bits for VF messages */
+#define IXGBE_MBVFICR_VFREQ_VF1 0x00000001 /* bit for VF 1 message */
+#define IXGBE_MBVFICR_VFACK_MASK 0xFFFF0000 /* bits for VF acks */
+#define IXGBE_MBVFICR_VFACK_VF1 0x00010000 /* bit for VF 1 ack */
+
+
+/* If it's a IXGBE_VF_* msg then it originates in the VF and is sent to the
+ * PF. The reverse is true if it is IXGBE_PF_*.
+ * Message ACK's are the value or'd with 0xF0000000
+ */
+#define IXGBE_VT_MSGTYPE_ACK 0x80000000 /* Messages below or'd with
+ * this are the ACK */
+#define IXGBE_VT_MSGTYPE_NACK 0x40000000 /* Messages below or'd with
+ * this are the NACK */
+#define IXGBE_VT_MSGTYPE_CTS 0x20000000 /* Indicates that VF is still
+ * clear to send requests */
+#define IXGBE_VT_MSGINFO_SHIFT 16
+/* bits 23:16 are used for extra info for certain messages */
+#define IXGBE_VT_MSGINFO_MASK (0xFF << IXGBE_VT_MSGINFO_SHIFT)
+
+#define IXGBE_VF_RESET 0x01 /* VF requests reset */
+#define IXGBE_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */
+#define IXGBE_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */
+#define IXGBE_VF_SET_VLAN 0x04 /* VF requests PF to set VLAN */
+#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */
+#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */
+
+/* length of permanent address message returned from PF */
+#define IXGBE_VF_PERMADDR_MSG_LEN 4
+/* word in permanent address message with the current multicast type */
+#define IXGBE_VF_MC_TYPE_WORD 3
+
+#define IXGBE_PF_CONTROL_MSG 0x0100 /* PF control message */
+
+
+#define IXGBE_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */
+#define IXGBE_VF_MBX_INIT_DELAY 500 /* microseconds between retries */
+
+s32 ixgbe_read_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+s32 ixgbe_write_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+s32 ixgbe_read_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+s32 ixgbe_write_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+s32 ixgbe_check_for_msg(struct ixgbe_hw *, u16);
+s32 ixgbe_check_for_ack(struct ixgbe_hw *, u16);
+s32 ixgbe_check_for_rst(struct ixgbe_hw *, u16);
+void ixgbe_init_mbx_ops_generic(struct ixgbe_hw *hw);
+void ixgbe_init_mbx_params_vf(struct ixgbe_hw *);
+void ixgbe_init_mbx_params_pf(struct ixgbe_hw *);
+
+#endif /* _IXGBE_MBX_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
new file mode 100644
index 00000000..d161600b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
@@ -0,0 +1,132 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+/* glue for the OS independent part of ixgbe
+ * includes register access macros
+ */
+
+#ifndef _IXGBE_OSDEP_H_
+#define _IXGBE_OSDEP_H_
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/sched.h>
+#include "kcompat.h"
+
+
+#ifndef msleep
+#define msleep(x) do { if (in_interrupt()) { \
+ /* Don't mdelay in interrupt context! */ \
+ BUG(); \
+ } else { \
+ msleep(x); \
+ } } while (0)
+
+#endif
+
+#undef ASSERT
+
+#ifdef DBG
+#define hw_dbg(hw, S, A...) printk(KERN_DEBUG S, ## A)
+#else
+#define hw_dbg(hw, S, A...) do {} while (0)
+#endif
+
+#define e_dev_info(format, arg...) \
+ dev_info(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_dev_warn(format, arg...) \
+ dev_warn(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_dev_err(format, arg...) \
+ dev_err(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_dev_notice(format, arg...) \
+ dev_notice(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_info(msglvl, format, arg...) \
+ netif_info(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_err(msglvl, format, arg...) \
+ netif_err(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_warn(msglvl, format, arg...) \
+ netif_warn(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_crit(msglvl, format, arg...) \
+ netif_crit(adapter, msglvl, adapter->netdev, format, ## arg)
+
+
+#ifdef DBG
+#define IXGBE_WRITE_REG(a, reg, value) do {\
+ switch (reg) { \
+ case IXGBE_EIMS: \
+ case IXGBE_EIMC: \
+ case IXGBE_EIAM: \
+ case IXGBE_EIAC: \
+ case IXGBE_EICR: \
+ case IXGBE_EICS: \
+ printk("%s: Reg - 0x%05X, value - 0x%08X\n", __func__, \
+ reg, (u32)(value)); \
+ default: \
+ break; \
+ } \
+ writel((value), ((a)->hw_addr + (reg))); \
+} while (0)
+#else
+#define IXGBE_WRITE_REG(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
+#endif
+
+#define IXGBE_READ_REG(a, reg) readl((a)->hw_addr + (reg))
+
+#define IXGBE_WRITE_REG_ARRAY(a, reg, offset, value) ( \
+ writel((value), ((a)->hw_addr + (reg) + ((offset) << 2))))
+
+#define IXGBE_READ_REG_ARRAY(a, reg, offset) ( \
+ readl((a)->hw_addr + (reg) + ((offset) << 2)))
+
+#ifndef writeq
+#define writeq(val, addr) do { writel((u32) (val), addr); \
+ writel((u32) (val >> 32), (addr + 4)); \
+ } while (0);
+#endif
+
+#define IXGBE_WRITE_REG64(a, reg, value) writeq((value), ((a)->hw_addr + (reg)))
+
+#define IXGBE_WRITE_FLUSH(a) IXGBE_READ_REG(a, IXGBE_STATUS)
+struct ixgbe_hw;
+extern u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg);
+extern void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value);
+extern void ewarn(struct ixgbe_hw *hw, const char *str, u32 status);
+
+#define IXGBE_READ_PCIE_WORD ixgbe_read_pci_cfg_word
+#define IXGBE_WRITE_PCIE_WORD ixgbe_write_pci_cfg_word
+#define IXGBE_EEPROM_GRANT_ATTEMPS 100
+#define IXGBE_HTONL(_i) htonl(_i)
+#define IXGBE_NTOHL(_i) ntohl(_i)
+#define IXGBE_NTOHS(_i) ntohs(_i)
+#define IXGBE_CPU_TO_LE32(_i) cpu_to_le32(_i)
+#define IXGBE_LE32_TO_CPUS(_i) le32_to_cpus(_i)
+#define EWARN(H, W, S) ewarn(H, W, S)
+
+#endif /* _IXGBE_OSDEP_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
new file mode 100644
index 00000000..e3f5275e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
@@ -0,0 +1,1847 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+
+static void ixgbe_i2c_start(struct ixgbe_hw *hw);
+static void ixgbe_i2c_stop(struct ixgbe_hw *hw);
+static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data);
+static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data);
+static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw);
+static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data);
+static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data);
+static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl);
+static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl);
+static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data);
+static bool ixgbe_get_i2c_data(u32 *i2cctl);
+
+/**
+ * ixgbe_init_phy_ops_generic - Inits PHY function ptrs
+ * @hw: pointer to the hardware structure
+ *
+ * Initialize the function pointers.
+ **/
+s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw)
+{
+ struct ixgbe_phy_info *phy = &hw->phy;
+
+ /* PHY */
+ phy->ops.identify = &ixgbe_identify_phy_generic;
+ phy->ops.reset = &ixgbe_reset_phy_generic;
+ phy->ops.read_reg = &ixgbe_read_phy_reg_generic;
+ phy->ops.write_reg = &ixgbe_write_phy_reg_generic;
+ phy->ops.setup_link = &ixgbe_setup_phy_link_generic;
+ phy->ops.setup_link_speed = &ixgbe_setup_phy_link_speed_generic;
+ phy->ops.check_link = NULL;
+ phy->ops.get_firmware_version = ixgbe_get_phy_firmware_version_generic;
+ phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_generic;
+ phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_generic;
+ phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_generic;
+ phy->ops.write_i2c_eeprom = &ixgbe_write_i2c_eeprom_generic;
+ phy->ops.i2c_bus_clear = &ixgbe_i2c_bus_clear;
+ phy->ops.identify_sfp = &ixgbe_identify_module_generic;
+ phy->sfp_type = ixgbe_sfp_type_unknown;
+ phy->ops.check_overtemp = &ixgbe_tn_check_overtemp;
+ return 0;
+}
+
+/**
+ * ixgbe_identify_phy_generic - Get physical layer module
+ * @hw: pointer to hardware structure
+ *
+ * Determines the physical layer module found on the current adapter.
+ **/
+s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
+{
+ s32 status = IXGBE_ERR_PHY_ADDR_INVALID;
+ u32 phy_addr;
+ u16 ext_ability = 0;
+
+ if (hw->phy.type == ixgbe_phy_unknown) {
+ for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) {
+ if (ixgbe_validate_phy_addr(hw, phy_addr)) {
+ hw->phy.addr = phy_addr;
+ ixgbe_get_phy_id(hw);
+ hw->phy.type =
+ ixgbe_get_phy_type_from_id(hw->phy.id);
+
+ if (hw->phy.type == ixgbe_phy_unknown) {
+ hw->phy.ops.read_reg(hw,
+ IXGBE_MDIO_PHY_EXT_ABILITY,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+ &ext_ability);
+ if (ext_ability &
+ (IXGBE_MDIO_PHY_10GBASET_ABILITY |
+ IXGBE_MDIO_PHY_1000BASET_ABILITY))
+ hw->phy.type =
+ ixgbe_phy_cu_unknown;
+ else
+ hw->phy.type =
+ ixgbe_phy_generic;
+ }
+
+ status = 0;
+ break;
+ }
+ }
+ /* clear value if nothing found */
+ if (status != 0)
+ hw->phy.addr = 0;
+ } else {
+ status = 0;
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_validate_phy_addr - Determines phy address is valid
+ * @hw: pointer to hardware structure
+ *
+ **/
+bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr)
+{
+ u16 phy_id = 0;
+ bool valid = false;
+
+ hw->phy.addr = phy_addr;
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_id);
+
+ if (phy_id != 0xFFFF && phy_id != 0x0)
+ valid = true;
+
+ return valid;
+}
+
+/**
+ * ixgbe_get_phy_id - Get the phy type
+ * @hw: pointer to hardware structure
+ *
+ **/
+s32 ixgbe_get_phy_id(struct ixgbe_hw *hw)
+{
+ u32 status;
+ u16 phy_id_high = 0;
+ u16 phy_id_low = 0;
+
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+ &phy_id_high);
+
+ if (status == 0) {
+ hw->phy.id = (u32)(phy_id_high << 16);
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_LOW,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+ &phy_id_low);
+ hw->phy.id |= (u32)(phy_id_low & IXGBE_PHY_REVISION_MASK);
+ hw->phy.revision = (u32)(phy_id_low & ~IXGBE_PHY_REVISION_MASK);
+ }
+ return status;
+}
+
+/**
+ * ixgbe_get_phy_type_from_id - Get the phy type
+ * @hw: pointer to hardware structure
+ *
+ **/
+enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
+{
+ enum ixgbe_phy_type phy_type;
+
+ switch (phy_id) {
+ case TN1010_PHY_ID:
+ phy_type = ixgbe_phy_tn;
+ break;
+ case X540_PHY_ID:
+ phy_type = ixgbe_phy_aq;
+ break;
+ case QT2022_PHY_ID:
+ phy_type = ixgbe_phy_qt;
+ break;
+ case ATH_PHY_ID:
+ phy_type = ixgbe_phy_nl;
+ break;
+ default:
+ phy_type = ixgbe_phy_unknown;
+ break;
+ }
+
+ hw_dbg(hw, "phy type found is %d\n", phy_type);
+ return phy_type;
+}
+
+/**
+ * ixgbe_reset_phy_generic - Performs a PHY reset
+ * @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
+{
+ u32 i;
+ u16 ctrl = 0;
+ s32 status = 0;
+
+ if (hw->phy.type == ixgbe_phy_unknown)
+ status = ixgbe_identify_phy_generic(hw);
+
+ if (status != 0 || hw->phy.type == ixgbe_phy_none)
+ goto out;
+
+ /* Don't reset PHY if it's shut down due to overtemp. */
+ if (!hw->phy.reset_if_overtemp &&
+ (IXGBE_ERR_OVERTEMP == hw->phy.ops.check_overtemp(hw)))
+ goto out;
+
+ /*
+ * Perform soft PHY reset to the PHY_XS.
+ * This will cause a soft reset to the PHY
+ */
+ hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
+ IXGBE_MDIO_PHY_XS_DEV_TYPE,
+ IXGBE_MDIO_PHY_XS_RESET);
+
+ /*
+ * Poll for reset bit to self-clear indicating reset is complete.
+ * Some PHYs could take up to 3 seconds to complete and need about
+ * 1.7 usec delay after the reset is complete.
+ */
+ for (i = 0; i < 30; i++) {
+ msleep(100);
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
+ IXGBE_MDIO_PHY_XS_DEV_TYPE, &ctrl);
+ if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) {
+ udelay(2);
+ break;
+ }
+ }
+
+ if (ctrl & IXGBE_MDIO_PHY_XS_RESET) {
+ status = IXGBE_ERR_RESET_FAILED;
+ hw_dbg(hw, "PHY reset polling failed to complete.\n");
+ }
+
+out:
+ return status;
+}
+
+/**
+ * ixgbe_read_phy_reg_generic - Reads a value from a specified PHY register
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit address of PHY register to read
+ * @phy_data: Pointer to read data from PHY register
+ **/
+s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+ u32 device_type, u16 *phy_data)
+{
+ u32 command;
+ u32 i;
+ u32 data;
+ s32 status = 0;
+ u16 gssr;
+
+ if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
+ gssr = IXGBE_GSSR_PHY1_SM;
+ else
+ gssr = IXGBE_GSSR_PHY0_SM;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0)
+ status = IXGBE_ERR_SWFW_SYNC;
+
+ if (status == 0) {
+ /* Setup and write the address cycle command */
+ command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) |
+ (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+ (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+ (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND));
+
+ IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+ /*
+ * Check every 10 usec to see if the address cycle completed.
+ * The MDI Command bit will clear when the operation is
+ * complete
+ */
+ for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+ udelay(10);
+
+ command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+
+ if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+ break;
+ }
+
+ if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+ hw_dbg(hw, "PHY address command did not complete.\n");
+ status = IXGBE_ERR_PHY;
+ }
+
+ if (status == 0) {
+ /*
+ * Address cycle complete, setup and write the read
+ * command
+ */
+ command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) |
+ (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+ (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+ (IXGBE_MSCA_READ | IXGBE_MSCA_MDI_COMMAND));
+
+ IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+ /*
+ * Check every 10 usec to see if the address cycle
+ * completed. The MDI Command bit will clear when the
+ * operation is complete
+ */
+ for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+ udelay(10);
+
+ command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+
+ if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+ break;
+ }
+
+ if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+ hw_dbg(hw, "PHY read command didn't complete\n");
+ status = IXGBE_ERR_PHY;
+ } else {
+ /*
+ * Read operation is complete. Get the data
+ * from MSRWD
+ */
+ data = IXGBE_READ_REG(hw, IXGBE_MSRWD);
+ data >>= IXGBE_MSRWD_READ_DATA_SHIFT;
+ *phy_data = (u16)(data);
+ }
+ }
+
+ hw->mac.ops.release_swfw_sync(hw, gssr);
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_write_phy_reg_generic - Writes a value to specified PHY register
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit PHY register to write
+ * @device_type: 5 bit device type
+ * @phy_data: Data to write to the PHY register
+ **/
+s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+ u32 device_type, u16 phy_data)
+{
+ u32 command;
+ u32 i;
+ s32 status = 0;
+ u16 gssr;
+
+ if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
+ gssr = IXGBE_GSSR_PHY1_SM;
+ else
+ gssr = IXGBE_GSSR_PHY0_SM;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0)
+ status = IXGBE_ERR_SWFW_SYNC;
+
+ if (status == 0) {
+ /* Put the data in the MDI single read and write data register*/
+ IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)phy_data);
+
+ /* Setup and write the address cycle command */
+ command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) |
+ (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+ (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+ (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND));
+
+ IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+ /*
+ * Check every 10 usec to see if the address cycle completed.
+ * The MDI Command bit will clear when the operation is
+ * complete
+ */
+ for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+ udelay(10);
+
+ command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+
+ if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+ break;
+ }
+
+ if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+ hw_dbg(hw, "PHY address cmd didn't complete\n");
+ status = IXGBE_ERR_PHY;
+ }
+
+ if (status == 0) {
+ /*
+ * Address cycle complete, setup and write the write
+ * command
+ */
+ command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) |
+ (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+ (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+ (IXGBE_MSCA_WRITE | IXGBE_MSCA_MDI_COMMAND));
+
+ IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+ /*
+ * Check every 10 usec to see if the address cycle
+ * completed. The MDI Command bit will clear when the
+ * operation is complete
+ */
+ for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+ udelay(10);
+
+ command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+
+ if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+ break;
+ }
+
+ if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+ hw_dbg(hw, "PHY address cmd didn't complete\n");
+ status = IXGBE_ERR_PHY;
+ }
+ }
+
+ hw->mac.ops.release_swfw_sync(hw, gssr);
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_setup_phy_link_generic - Set and restart autoneg
+ * @hw: pointer to hardware structure
+ *
+ * Restart autonegotiation and PHY and waits for completion.
+ **/
+s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+ u32 time_out;
+ u32 max_time_out = 10;
+ u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
+ bool autoneg = false;
+ ixgbe_link_speed speed;
+
+ ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
+
+ if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
+ /* Set or unset auto-negotiation 10G advertisement */
+ hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_reg);
+
+ autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE;
+ if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
+ autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE;
+
+ hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ autoneg_reg);
+ }
+
+ if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
+ /* Set or unset auto-negotiation 1G advertisement */
+ hw->phy.ops.read_reg(hw,
+ IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_reg);
+
+ autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE;
+ if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
+ autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE;
+
+ hw->phy.ops.write_reg(hw,
+ IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ autoneg_reg);
+ }
+
+ if (speed & IXGBE_LINK_SPEED_100_FULL) {
+ /* Set or unset auto-negotiation 100M advertisement */
+ hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_reg);
+
+ autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE |
+ IXGBE_MII_100BASE_T_ADVERTISE_HALF);
+ if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
+ autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE;
+
+ hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ autoneg_reg);
+ }
+
+ /* Restart PHY autonegotiation and wait for completion */
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
+
+ autoneg_reg |= IXGBE_MII_RESTART;
+
+ hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
+
+ /* Wait for autonegotiation to finish */
+ for (time_out = 0; time_out < max_time_out; time_out++) {
+ udelay(10);
+ /* Restart PHY autonegotiation and wait for completion */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_reg);
+
+ autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE;
+ if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE)
+ break;
+ }
+
+ if (time_out == max_time_out) {
+ status = IXGBE_ERR_LINK_SETUP;
+ hw_dbg(hw, "ixgbe_setup_phy_link_generic: time out");
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_setup_phy_link_speed_generic - Sets the auto advertised capabilities
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg: true if autonegotiation enabled
+ **/
+s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg,
+ bool autoneg_wait_to_complete)
+{
+
+ /*
+ * Clear autoneg_advertised and set new values based on input link
+ * speed.
+ */
+ hw->phy.autoneg_advertised = 0;
+
+ if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+ hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+ if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+ hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+ if (speed & IXGBE_LINK_SPEED_100_FULL)
+ hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL;
+
+ /* Setup link based on the new speed settings */
+ hw->phy.ops.setup_link(hw);
+
+ return 0;
+}
+
+/**
+ * ixgbe_get_copper_link_capabilities_generic - Determines link capabilities
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @autoneg: boolean auto-negotiation value
+ *
+ * Determines the link capabilities by reading the AUTOC register.
+ **/
+s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed,
+ bool *autoneg)
+{
+ s32 status = IXGBE_ERR_LINK_SETUP;
+ u16 speed_ability;
+
+ *speed = 0;
+ *autoneg = true;
+
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_SPEED_ABILITY,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+ &speed_ability);
+
+ if (status == 0) {
+ if (speed_ability & IXGBE_MDIO_PHY_SPEED_10G)
+ *speed |= IXGBE_LINK_SPEED_10GB_FULL;
+ if (speed_ability & IXGBE_MDIO_PHY_SPEED_1G)
+ *speed |= IXGBE_LINK_SPEED_1GB_FULL;
+ if (speed_ability & IXGBE_MDIO_PHY_SPEED_100M)
+ *speed |= IXGBE_LINK_SPEED_100_FULL;
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_check_phy_link_tnx - Determine link and speed status
+ * @hw: pointer to hardware structure
+ *
+ * Reads the VS1 register to determine if link is up and the current speed for
+ * the PHY.
+ **/
+s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+ bool *link_up)
+{
+ s32 status = 0;
+ u32 time_out;
+ u32 max_time_out = 10;
+ u16 phy_link = 0;
+ u16 phy_speed = 0;
+ u16 phy_data = 0;
+
+ /* Initialize speed and link to default case */
+ *link_up = false;
+ *speed = IXGBE_LINK_SPEED_10GB_FULL;
+
+ /*
+ * Check current speed and link status of the PHY register.
+ * This is a vendor specific register and may have to
+ * be changed for other copper PHYs.
+ */
+ for (time_out = 0; time_out < max_time_out; time_out++) {
+ udelay(10);
+ status = hw->phy.ops.read_reg(hw,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ &phy_data);
+ phy_link = phy_data & IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS;
+ phy_speed = phy_data &
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS;
+ if (phy_link == IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS) {
+ *link_up = true;
+ if (phy_speed ==
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS)
+ *speed = IXGBE_LINK_SPEED_1GB_FULL;
+ break;
+ }
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_setup_phy_link_tnx - Set and restart autoneg
+ * @hw: pointer to hardware structure
+ *
+ * Restart autonegotiation and PHY and waits for completion.
+ **/
+s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+ u32 time_out;
+ u32 max_time_out = 10;
+ u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
+ bool autoneg = false;
+ ixgbe_link_speed speed;
+
+ ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
+
+ if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
+ /* Set or unset auto-negotiation 10G advertisement */
+ hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_reg);
+
+ autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE;
+ if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
+ autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE;
+
+ hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ autoneg_reg);
+ }
+
+ if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
+ /* Set or unset auto-negotiation 1G advertisement */
+ hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_reg);
+
+ autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX;
+ if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
+ autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX;
+
+ hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ autoneg_reg);
+ }
+
+ if (speed & IXGBE_LINK_SPEED_100_FULL) {
+ /* Set or unset auto-negotiation 100M advertisement */
+ hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_reg);
+
+ autoneg_reg &= ~IXGBE_MII_100BASE_T_ADVERTISE;
+ if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
+ autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE;
+
+ hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ autoneg_reg);
+ }
+
+ /* Restart PHY autonegotiation and wait for completion */
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
+
+ autoneg_reg |= IXGBE_MII_RESTART;
+
+ hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
+
+ /* Wait for autonegotiation to finish */
+ for (time_out = 0; time_out < max_time_out; time_out++) {
+ udelay(10);
+ /* Restart PHY autonegotiation and wait for completion */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_reg);
+
+ autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE;
+ if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE)
+ break;
+ }
+
+ if (time_out == max_time_out) {
+ status = IXGBE_ERR_LINK_SETUP;
+ hw_dbg(hw, "ixgbe_setup_phy_link_tnx: time out");
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_get_phy_firmware_version_tnx - Gets the PHY Firmware Version
+ * @hw: pointer to hardware structure
+ * @firmware_version: pointer to the PHY Firmware Version
+ **/
+s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw,
+ u16 *firmware_version)
+{
+ s32 status = 0;
+
+ status = hw->phy.ops.read_reg(hw, TNX_FW_REV,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ firmware_version);
+
+ return status;
+}
+
+/**
+ * ixgbe_get_phy_firmware_version_generic - Gets the PHY Firmware Version
+ * @hw: pointer to hardware structure
+ * @firmware_version: pointer to the PHY Firmware Version
+ **/
+s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw,
+ u16 *firmware_version)
+{
+ s32 status = 0;
+
+ status = hw->phy.ops.read_reg(hw, AQ_FW_REV,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ firmware_version);
+
+ return status;
+}
+
+/**
+ * ixgbe_reset_phy_nl - Performs a PHY reset
+ * @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw)
+{
+ u16 phy_offset, control, eword, edata, block_crc;
+ bool end_data = false;
+ u16 list_offset, data_offset;
+ u16 phy_data = 0;
+ s32 ret_val = 0;
+ u32 i;
+
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
+ IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data);
+
+ /* reset the PHY and poll for completion */
+ hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
+ IXGBE_MDIO_PHY_XS_DEV_TYPE,
+ (phy_data | IXGBE_MDIO_PHY_XS_RESET));
+
+ for (i = 0; i < 100; i++) {
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
+ IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data);
+ if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) == 0)
+ break;
+ msleep(10);
+ }
+
+ if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) != 0) {
+ hw_dbg(hw, "PHY reset did not complete.\n");
+ ret_val = IXGBE_ERR_PHY;
+ goto out;
+ }
+
+ /* Get init offsets */
+ ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset,
+ &data_offset);
+ if (ret_val != 0)
+ goto out;
+
+ ret_val = hw->eeprom.ops.read(hw, data_offset, &block_crc);
+ data_offset++;
+ while (!end_data) {
+ /*
+ * Read control word from PHY init contents offset
+ */
+ ret_val = hw->eeprom.ops.read(hw, data_offset, &eword);
+ control = (eword & IXGBE_CONTROL_MASK_NL) >>
+ IXGBE_CONTROL_SHIFT_NL;
+ edata = eword & IXGBE_DATA_MASK_NL;
+ switch (control) {
+ case IXGBE_DELAY_NL:
+ data_offset++;
+ hw_dbg(hw, "DELAY: %d MS\n", edata);
+ msleep(edata);
+ break;
+ case IXGBE_DATA_NL:
+ hw_dbg(hw, "DATA:\n");
+ data_offset++;
+ hw->eeprom.ops.read(hw, data_offset++,
+ &phy_offset);
+ for (i = 0; i < edata; i++) {
+ hw->eeprom.ops.read(hw, data_offset, &eword);
+ hw->phy.ops.write_reg(hw, phy_offset,
+ IXGBE_TWINAX_DEV, eword);
+ hw_dbg(hw, "Wrote %4.4x to %4.4x\n", eword,
+ phy_offset);
+ data_offset++;
+ phy_offset++;
+ }
+ break;
+ case IXGBE_CONTROL_NL:
+ data_offset++;
+ hw_dbg(hw, "CONTROL:\n");
+ if (edata == IXGBE_CONTROL_EOL_NL) {
+ hw_dbg(hw, "EOL\n");
+ end_data = true;
+ } else if (edata == IXGBE_CONTROL_SOL_NL) {
+ hw_dbg(hw, "SOL\n");
+ } else {
+ hw_dbg(hw, "Bad control value\n");
+ ret_val = IXGBE_ERR_PHY;
+ goto out;
+ }
+ break;
+ default:
+ hw_dbg(hw, "Bad control type\n");
+ ret_val = IXGBE_ERR_PHY;
+ goto out;
+ }
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_identify_module_generic - Identifies module type
+ * @hw: pointer to hardware structure
+ *
+ * Determines HW type and calls appropriate function.
+ **/
+s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw)
+{
+ s32 status = IXGBE_ERR_SFP_NOT_PRESENT;
+
+ switch (hw->mac.ops.get_media_type(hw)) {
+ case ixgbe_media_type_fiber:
+ status = ixgbe_identify_sfp_module_generic(hw);
+ break;
+
+ case ixgbe_media_type_fiber_qsfp:
+ status = ixgbe_identify_qsfp_module_generic(hw);
+ break;
+
+ default:
+ hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+ status = IXGBE_ERR_SFP_NOT_PRESENT;
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_identify_sfp_module_generic - Identifies SFP modules
+ * @hw: pointer to hardware structure
+ *
+ * Searches for and identifies the SFP module and assigns appropriate PHY type.
+ **/
+s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
+{
+ s32 status = IXGBE_ERR_PHY_ADDR_INVALID;
+ u32 vendor_oui = 0;
+ enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type;
+ u8 identifier = 0;
+ u8 comp_codes_1g = 0;
+ u8 comp_codes_10g = 0;
+ u8 oui_bytes[3] = {0, 0, 0};
+ u8 cable_tech = 0;
+ u8 cable_spec = 0;
+ u16 enforce_sfp = 0;
+
+ if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) {
+ hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+ status = IXGBE_ERR_SFP_NOT_PRESENT;
+ goto out;
+ }
+
+ status = hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_IDENTIFIER,
+ &identifier);
+
+ if (status == IXGBE_ERR_SWFW_SYNC ||
+ status == IXGBE_ERR_I2C ||
+ status == IXGBE_ERR_SFP_NOT_PRESENT)
+ goto err_read_i2c_eeprom;
+
+ /* LAN ID is needed for sfp_type determination */
+ hw->mac.ops.set_lan_id(hw);
+
+ if (identifier != IXGBE_SFF_IDENTIFIER_SFP) {
+ hw->phy.type = ixgbe_phy_sfp_unsupported;
+ status = IXGBE_ERR_SFP_NOT_SUPPORTED;
+ } else {
+ status = hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_1GBE_COMP_CODES,
+ &comp_codes_1g);
+
+ if (status == IXGBE_ERR_SWFW_SYNC ||
+ status == IXGBE_ERR_I2C ||
+ status == IXGBE_ERR_SFP_NOT_PRESENT)
+ goto err_read_i2c_eeprom;
+
+ status = hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_10GBE_COMP_CODES,
+ &comp_codes_10g);
+
+ if (status == IXGBE_ERR_SWFW_SYNC ||
+ status == IXGBE_ERR_I2C ||
+ status == IXGBE_ERR_SFP_NOT_PRESENT)
+ goto err_read_i2c_eeprom;
+ status = hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_CABLE_TECHNOLOGY,
+ &cable_tech);
+
+ if (status == IXGBE_ERR_SWFW_SYNC ||
+ status == IXGBE_ERR_I2C ||
+ status == IXGBE_ERR_SFP_NOT_PRESENT)
+ goto err_read_i2c_eeprom;
+
+ /* ID Module
+ * =========
+ * 0 SFP_DA_CU
+ * 1 SFP_SR
+ * 2 SFP_LR
+ * 3 SFP_DA_CORE0 - 82599-specific
+ * 4 SFP_DA_CORE1 - 82599-specific
+ * 5 SFP_SR/LR_CORE0 - 82599-specific
+ * 6 SFP_SR/LR_CORE1 - 82599-specific
+ * 7 SFP_act_lmt_DA_CORE0 - 82599-specific
+ * 8 SFP_act_lmt_DA_CORE1 - 82599-specific
+ * 9 SFP_1g_cu_CORE0 - 82599-specific
+ * 10 SFP_1g_cu_CORE1 - 82599-specific
+ * 11 SFP_1g_sx_CORE0 - 82599-specific
+ * 12 SFP_1g_sx_CORE1 - 82599-specific
+ */
+ if (hw->mac.type == ixgbe_mac_82598EB) {
+ if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
+ hw->phy.sfp_type = ixgbe_sfp_type_da_cu;
+ else if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)
+ hw->phy.sfp_type = ixgbe_sfp_type_sr;
+ else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)
+ hw->phy.sfp_type = ixgbe_sfp_type_lr;
+ else
+ hw->phy.sfp_type = ixgbe_sfp_type_unknown;
+ } else if (hw->mac.type == ixgbe_mac_82599EB) {
+ if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) {
+ if (hw->bus.lan_id == 0)
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_da_cu_core0;
+ else
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_da_cu_core1;
+ } else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) {
+ hw->phy.ops.read_i2c_eeprom(
+ hw, IXGBE_SFF_CABLE_SPEC_COMP,
+ &cable_spec);
+ if (cable_spec &
+ IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING) {
+ if (hw->bus.lan_id == 0)
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_da_act_lmt_core0;
+ else
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_da_act_lmt_core1;
+ } else {
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_unknown;
+ }
+ } else if (comp_codes_10g &
+ (IXGBE_SFF_10GBASESR_CAPABLE |
+ IXGBE_SFF_10GBASELR_CAPABLE)) {
+ if (hw->bus.lan_id == 0)
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_srlr_core0;
+ else
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_srlr_core1;
+ } else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE) {
+ if (hw->bus.lan_id == 0)
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_1g_cu_core0;
+ else
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_1g_cu_core1;
+ } else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) {
+ if (hw->bus.lan_id == 0)
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_1g_sx_core0;
+ else
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_1g_sx_core1;
+ } else {
+ hw->phy.sfp_type = ixgbe_sfp_type_unknown;
+ }
+ }
+
+ if (hw->phy.sfp_type != stored_sfp_type)
+ hw->phy.sfp_setup_needed = true;
+
+ /* Determine if the SFP+ PHY is dual speed or not. */
+ hw->phy.multispeed_fiber = false;
+ if (((comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) &&
+ (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)) ||
+ ((comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) &&
+ (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)))
+ hw->phy.multispeed_fiber = true;
+
+ /* Determine PHY vendor */
+ if (hw->phy.type != ixgbe_phy_nl) {
+ hw->phy.id = identifier;
+ status = hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_VENDOR_OUI_BYTE0,
+ &oui_bytes[0]);
+
+ if (status == IXGBE_ERR_SWFW_SYNC ||
+ status == IXGBE_ERR_I2C ||
+ status == IXGBE_ERR_SFP_NOT_PRESENT)
+ goto err_read_i2c_eeprom;
+
+ status = hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_VENDOR_OUI_BYTE1,
+ &oui_bytes[1]);
+
+ if (status == IXGBE_ERR_SWFW_SYNC ||
+ status == IXGBE_ERR_I2C ||
+ status == IXGBE_ERR_SFP_NOT_PRESENT)
+ goto err_read_i2c_eeprom;
+
+ status = hw->phy.ops.read_i2c_eeprom(hw,
+ IXGBE_SFF_VENDOR_OUI_BYTE2,
+ &oui_bytes[2]);
+
+ if (status == IXGBE_ERR_SWFW_SYNC ||
+ status == IXGBE_ERR_I2C ||
+ status == IXGBE_ERR_SFP_NOT_PRESENT)
+ goto err_read_i2c_eeprom;
+
+ vendor_oui =
+ ((oui_bytes[0] << IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT) |
+ (oui_bytes[1] << IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT) |
+ (oui_bytes[2] << IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT));
+
+ switch (vendor_oui) {
+ case IXGBE_SFF_VENDOR_OUI_TYCO:
+ if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
+ hw->phy.type =
+ ixgbe_phy_sfp_passive_tyco;
+ break;
+ case IXGBE_SFF_VENDOR_OUI_FTL:
+ if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE)
+ hw->phy.type = ixgbe_phy_sfp_ftl_active;
+ else
+ hw->phy.type = ixgbe_phy_sfp_ftl;
+ break;
+ case IXGBE_SFF_VENDOR_OUI_AVAGO:
+ hw->phy.type = ixgbe_phy_sfp_avago;
+ break;
+ case IXGBE_SFF_VENDOR_OUI_INTEL:
+ hw->phy.type = ixgbe_phy_sfp_intel;
+ break;
+ default:
+ if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
+ hw->phy.type =
+ ixgbe_phy_sfp_passive_unknown;
+ else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE)
+ hw->phy.type =
+ ixgbe_phy_sfp_active_unknown;
+ else
+ hw->phy.type = ixgbe_phy_sfp_unknown;
+ break;
+ }
+ }
+
+ /* Allow any DA cable vendor */
+ if (cable_tech & (IXGBE_SFF_DA_PASSIVE_CABLE |
+ IXGBE_SFF_DA_ACTIVE_CABLE)) {
+ status = 0;
+ goto out;
+ }
+
+ /* Verify supported 1G SFP modules */
+ if (comp_codes_10g == 0 &&
+ !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) {
+ hw->phy.type = ixgbe_phy_sfp_unsupported;
+ status = IXGBE_ERR_SFP_NOT_SUPPORTED;
+ goto out;
+ }
+
+ /* Anything else 82598-based is supported */
+ if (hw->mac.type == ixgbe_mac_82598EB) {
+ status = 0;
+ goto out;
+ }
+
+ ixgbe_get_device_caps(hw, &enforce_sfp);
+ if (!(enforce_sfp & IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP) &&
+ !((hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0) ||
+ (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1) ||
+ (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0) ||
+ (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1))) {
+ /* Make sure we're a supported PHY type */
+ if (hw->phy.type == ixgbe_phy_sfp_intel) {
+ status = 0;
+ } else {
+ if (hw->allow_unsupported_sfp == true) {
+ EWARN(hw, "WARNING: Intel (R) Network "
+ "Connections are quality tested "
+ "using Intel (R) Ethernet Optics."
+ " Using untested modules is not "
+ "supported and may cause unstable"
+ " operation or damage to the "
+ "module or the adapter. Intel "
+ "Corporation is not responsible "
+ "for any harm caused by using "
+ "untested modules.\n", status);
+ status = 0;
+ } else {
+ hw_dbg(hw, "SFP+ module not supported\n");
+ hw->phy.type =
+ ixgbe_phy_sfp_unsupported;
+ status = IXGBE_ERR_SFP_NOT_SUPPORTED;
+ }
+ }
+ } else {
+ status = 0;
+ }
+ }
+
+out:
+ return status;
+
+err_read_i2c_eeprom:
+ hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+ if (hw->phy.type != ixgbe_phy_nl) {
+ hw->phy.id = 0;
+ hw->phy.type = ixgbe_phy_unknown;
+ }
+ return IXGBE_ERR_SFP_NOT_PRESENT;
+}
+
+/**
+ * ixgbe_identify_qsfp_module_generic - Identifies QSFP modules
+ * @hw: pointer to hardware structure
+ *
+ * Searches for and identifies the QSFP module and assigns appropriate PHY type
+ **/
+s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+
+ if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) {
+ hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+ status = IXGBE_ERR_SFP_NOT_PRESENT;
+ }
+
+ return status;
+}
+
+
+/**
+ * ixgbe_get_sfp_init_sequence_offsets - Provides offset of PHY init sequence
+ * @hw: pointer to hardware structure
+ * @list_offset: offset to the SFP ID list
+ * @data_offset: offset to the SFP data block
+ *
+ * Checks the MAC's EEPROM to see if it supports a given SFP+ module type, if
+ * so it returns the offsets to the phy init sequence block.
+ **/
+s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
+ u16 *list_offset,
+ u16 *data_offset)
+{
+ u16 sfp_id;
+ u16 sfp_type = hw->phy.sfp_type;
+
+ if (hw->phy.sfp_type == ixgbe_sfp_type_unknown)
+ return IXGBE_ERR_SFP_NOT_SUPPORTED;
+
+ if (hw->phy.sfp_type == ixgbe_sfp_type_not_present)
+ return IXGBE_ERR_SFP_NOT_PRESENT;
+
+ if ((hw->device_id == IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) &&
+ (hw->phy.sfp_type == ixgbe_sfp_type_da_cu))
+ return IXGBE_ERR_SFP_NOT_SUPPORTED;
+
+ /*
+ * Limiting active cables and 1G Phys must be initialized as
+ * SR modules
+ */
+ if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 ||
+ sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+ sfp_type == ixgbe_sfp_type_1g_sx_core0)
+ sfp_type = ixgbe_sfp_type_srlr_core0;
+ else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 ||
+ sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+ sfp_type == ixgbe_sfp_type_1g_sx_core1)
+ sfp_type = ixgbe_sfp_type_srlr_core1;
+
+ /* Read offset to PHY init contents */
+ hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset);
+
+ if ((!*list_offset) || (*list_offset == 0xFFFF))
+ return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT;
+
+ /* Shift offset to first ID word */
+ (*list_offset)++;
+
+ /*
+ * Find the matching SFP ID in the EEPROM
+ * and program the init sequence
+ */
+ hw->eeprom.ops.read(hw, *list_offset, &sfp_id);
+
+ while (sfp_id != IXGBE_PHY_INIT_END_NL) {
+ if (sfp_id == sfp_type) {
+ (*list_offset)++;
+ hw->eeprom.ops.read(hw, *list_offset, data_offset);
+ if ((!*data_offset) || (*data_offset == 0xFFFF)) {
+ hw_dbg(hw, "SFP+ module not supported\n");
+ return IXGBE_ERR_SFP_NOT_SUPPORTED;
+ } else {
+ break;
+ }
+ } else {
+ (*list_offset) += 2;
+ if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id))
+ return IXGBE_ERR_PHY;
+ }
+ }
+
+ if (sfp_id == IXGBE_PHY_INIT_END_NL) {
+ hw_dbg(hw, "No matching SFP+ module found\n");
+ return IXGBE_ERR_SFP_NOT_SUPPORTED;
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_read_i2c_eeprom_generic - Reads 8 bit EEPROM word over I2C interface
+ * @hw: pointer to hardware structure
+ * @byte_offset: EEPROM byte offset to read
+ * @eeprom_data: value read
+ *
+ * Performs byte read operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 *eeprom_data)
+{
+ return hw->phy.ops.read_i2c_byte(hw, byte_offset,
+ IXGBE_I2C_EEPROM_DEV_ADDR,
+ eeprom_data);
+}
+
+/**
+ * ixgbe_write_i2c_eeprom_generic - Writes 8 bit EEPROM word over I2C interface
+ * @hw: pointer to hardware structure
+ * @byte_offset: EEPROM byte offset to write
+ * @eeprom_data: value to write
+ *
+ * Performs byte write operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 eeprom_data)
+{
+ return hw->phy.ops.write_i2c_byte(hw, byte_offset,
+ IXGBE_I2C_EEPROM_DEV_ADDR,
+ eeprom_data);
+}
+
+/**
+ * ixgbe_read_i2c_byte_generic - Reads 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to read
+ * @data: value read
+ *
+ * Performs byte read operation to SFP module's EEPROM over I2C interface at
+ * a specified device address.
+ **/
+s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data)
+{
+ s32 status = 0;
+ u32 max_retry = 10;
+ u32 retry = 0;
+ u16 swfw_mask = 0;
+ bool nack = 1;
+ *data = 0;
+
+ if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
+ swfw_mask = IXGBE_GSSR_PHY1_SM;
+ else
+ swfw_mask = IXGBE_GSSR_PHY0_SM;
+
+ do {
+ if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)
+ != 0) {
+ status = IXGBE_ERR_SWFW_SYNC;
+ goto read_byte_out;
+ }
+
+ ixgbe_i2c_start(hw);
+
+ /* Device Address and write indication */
+ status = ixgbe_clock_out_i2c_byte(hw, dev_addr);
+ if (status != 0)
+ goto fail;
+
+ status = ixgbe_get_i2c_ack(hw);
+ if (status != 0)
+ goto fail;
+
+ status = ixgbe_clock_out_i2c_byte(hw, byte_offset);
+ if (status != 0)
+ goto fail;
+
+ status = ixgbe_get_i2c_ack(hw);
+ if (status != 0)
+ goto fail;
+
+ ixgbe_i2c_start(hw);
+
+ /* Device Address and read indication */
+ status = ixgbe_clock_out_i2c_byte(hw, (dev_addr | 0x1));
+ if (status != 0)
+ goto fail;
+
+ status = ixgbe_get_i2c_ack(hw);
+ if (status != 0)
+ goto fail;
+
+ status = ixgbe_clock_in_i2c_byte(hw, data);
+ if (status != 0)
+ goto fail;
+
+ status = ixgbe_clock_out_i2c_bit(hw, nack);
+ if (status != 0)
+ goto fail;
+
+ ixgbe_i2c_stop(hw);
+ break;
+
+fail:
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+ msleep(100);
+ ixgbe_i2c_bus_clear(hw);
+ retry++;
+ if (retry < max_retry)
+ hw_dbg(hw, "I2C byte read error - Retrying.\n");
+ else
+ hw_dbg(hw, "I2C byte read error.\n");
+
+ } while (retry < max_retry);
+
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+read_byte_out:
+ return status;
+}
+
+/**
+ * ixgbe_write_i2c_byte_generic - Writes 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to write
+ * @data: value to write
+ *
+ * Performs byte write operation to SFP module's EEPROM over I2C interface at
+ * a specified device address.
+ **/
+s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data)
+{
+ s32 status = 0;
+ u32 max_retry = 1;
+ u32 retry = 0;
+ u16 swfw_mask = 0;
+
+ if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
+ swfw_mask = IXGBE_GSSR_PHY1_SM;
+ else
+ swfw_mask = IXGBE_GSSR_PHY0_SM;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != 0) {
+ status = IXGBE_ERR_SWFW_SYNC;
+ goto write_byte_out;
+ }
+
+ do {
+ ixgbe_i2c_start(hw);
+
+ status = ixgbe_clock_out_i2c_byte(hw, dev_addr);
+ if (status != 0)
+ goto fail;
+
+ status = ixgbe_get_i2c_ack(hw);
+ if (status != 0)
+ goto fail;
+
+ status = ixgbe_clock_out_i2c_byte(hw, byte_offset);
+ if (status != 0)
+ goto fail;
+
+ status = ixgbe_get_i2c_ack(hw);
+ if (status != 0)
+ goto fail;
+
+ status = ixgbe_clock_out_i2c_byte(hw, data);
+ if (status != 0)
+ goto fail;
+
+ status = ixgbe_get_i2c_ack(hw);
+ if (status != 0)
+ goto fail;
+
+ ixgbe_i2c_stop(hw);
+ break;
+
+fail:
+ ixgbe_i2c_bus_clear(hw);
+ retry++;
+ if (retry < max_retry)
+ hw_dbg(hw, "I2C byte write error - Retrying.\n");
+ else
+ hw_dbg(hw, "I2C byte write error.\n");
+ } while (retry < max_retry);
+
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+write_byte_out:
+ return status;
+}
+
+/**
+ * ixgbe_i2c_start - Sets I2C start condition
+ * @hw: pointer to hardware structure
+ *
+ * Sets I2C start condition (High -> Low on SDA while SCL is High)
+ **/
+static void ixgbe_i2c_start(struct ixgbe_hw *hw)
+{
+ u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+
+ /* Start condition must begin with data and clock high */
+ ixgbe_set_i2c_data(hw, &i2cctl, 1);
+ ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+ /* Setup time for start condition (4.7us) */
+ udelay(IXGBE_I2C_T_SU_STA);
+
+ ixgbe_set_i2c_data(hw, &i2cctl, 0);
+
+ /* Hold time for start condition (4us) */
+ udelay(IXGBE_I2C_T_HD_STA);
+
+ ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+ /* Minimum low period of clock is 4.7 us */
+ udelay(IXGBE_I2C_T_LOW);
+
+}
+
+/**
+ * ixgbe_i2c_stop - Sets I2C stop condition
+ * @hw: pointer to hardware structure
+ *
+ * Sets I2C stop condition (Low -> High on SDA while SCL is High)
+ **/
+static void ixgbe_i2c_stop(struct ixgbe_hw *hw)
+{
+ u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+
+ /* Stop condition must begin with data low and clock high */
+ ixgbe_set_i2c_data(hw, &i2cctl, 0);
+ ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+ /* Setup time for stop condition (4us) */
+ udelay(IXGBE_I2C_T_SU_STO);
+
+ ixgbe_set_i2c_data(hw, &i2cctl, 1);
+
+ /* bus free time between stop and start (4.7us)*/
+ udelay(IXGBE_I2C_T_BUF);
+}
+
+/**
+ * ixgbe_clock_in_i2c_byte - Clocks in one byte via I2C
+ * @hw: pointer to hardware structure
+ * @data: data byte to clock in
+ *
+ * Clocks in one byte data via I2C data/clock
+ **/
+static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data)
+{
+ s32 i;
+ bool bit = 0;
+
+ for (i = 7; i >= 0; i--) {
+ ixgbe_clock_in_i2c_bit(hw, &bit);
+ *data |= bit << i;
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_clock_out_i2c_byte - Clocks out one byte via I2C
+ * @hw: pointer to hardware structure
+ * @data: data byte clocked out
+ *
+ * Clocks out one byte data via I2C data/clock
+ **/
+static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data)
+{
+ s32 status = 0;
+ s32 i;
+ u32 i2cctl;
+ bool bit = 0;
+
+ for (i = 7; i >= 0; i--) {
+ bit = (data >> i) & 0x1;
+ status = ixgbe_clock_out_i2c_bit(hw, bit);
+
+ if (status != 0)
+ break;
+ }
+
+ /* Release SDA line (set high) */
+ i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+ i2cctl |= IXGBE_I2C_DATA_OUT;
+ IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, i2cctl);
+ IXGBE_WRITE_FLUSH(hw);
+
+ return status;
+}
+
+/**
+ * ixgbe_get_i2c_ack - Polls for I2C ACK
+ * @hw: pointer to hardware structure
+ *
+ * Clocks in/out one bit via I2C data/clock
+ **/
+static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+ u32 i = 0;
+ u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+ u32 timeout = 10;
+ bool ack = 1;
+
+ ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+
+ /* Minimum high period of clock is 4us */
+ udelay(IXGBE_I2C_T_HIGH);
+
+ /* Poll for ACK. Note that ACK in I2C spec is
+ * transition from 1 to 0 */
+ for (i = 0; i < timeout; i++) {
+ i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+ ack = ixgbe_get_i2c_data(&i2cctl);
+
+ udelay(1);
+ if (ack == 0)
+ break;
+ }
+
+ if (ack == 1) {
+ hw_dbg(hw, "I2C ack was not received.\n");
+ status = IXGBE_ERR_I2C;
+ }
+
+ ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+ /* Minimum low period of clock is 4.7 us */
+ udelay(IXGBE_I2C_T_LOW);
+
+ return status;
+}
+
+/**
+ * ixgbe_clock_in_i2c_bit - Clocks in one bit via I2C data/clock
+ * @hw: pointer to hardware structure
+ * @data: read data value
+ *
+ * Clocks in one bit via I2C data/clock
+ **/
+static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data)
+{
+ u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+
+ ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+ /* Minimum high period of clock is 4us */
+ udelay(IXGBE_I2C_T_HIGH);
+
+ i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+ *data = ixgbe_get_i2c_data(&i2cctl);
+
+ ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+ /* Minimum low period of clock is 4.7 us */
+ udelay(IXGBE_I2C_T_LOW);
+
+ return 0;
+}
+
+/**
+ * ixgbe_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock
+ * @hw: pointer to hardware structure
+ * @data: data value to write
+ *
+ * Clocks out one bit via I2C data/clock
+ **/
+static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data)
+{
+ s32 status;
+ u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+
+ status = ixgbe_set_i2c_data(hw, &i2cctl, data);
+ if (status == 0) {
+ ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+ /* Minimum high period of clock is 4us */
+ udelay(IXGBE_I2C_T_HIGH);
+
+ ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+ /* Minimum low period of clock is 4.7 us.
+ * This also takes care of the data hold time.
+ */
+ udelay(IXGBE_I2C_T_LOW);
+ } else {
+ status = IXGBE_ERR_I2C;
+ hw_dbg(hw, "I2C data was not set to %X\n", data);
+ }
+
+ return status;
+}
+/**
+ * ixgbe_raise_i2c_clk - Raises the I2C SCL clock
+ * @hw: pointer to hardware structure
+ * @i2cctl: Current value of I2CCTL register
+ *
+ * Raises the I2C clock line '0'->'1'
+ **/
+static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl)
+{
+ u32 i = 0;
+ u32 timeout = IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT;
+ u32 i2cctl_r = 0;
+
+ for (i = 0; i < timeout; i++) {
+ *i2cctl |= IXGBE_I2C_CLK_OUT;
+
+ IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
+ IXGBE_WRITE_FLUSH(hw);
+ /* SCL rise time (1000ns) */
+ udelay(IXGBE_I2C_T_RISE);
+
+ i2cctl_r = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+ if (i2cctl_r & IXGBE_I2C_CLK_IN)
+ break;
+ }
+}
+
+/**
+ * ixgbe_lower_i2c_clk - Lowers the I2C SCL clock
+ * @hw: pointer to hardware structure
+ * @i2cctl: Current value of I2CCTL register
+ *
+ * Lowers the I2C clock line '1'->'0'
+ **/
+static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl)
+{
+
+ *i2cctl &= ~IXGBE_I2C_CLK_OUT;
+
+ IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* SCL fall time (300ns) */
+ udelay(IXGBE_I2C_T_FALL);
+}
+
+/**
+ * ixgbe_set_i2c_data - Sets the I2C data bit
+ * @hw: pointer to hardware structure
+ * @i2cctl: Current value of I2CCTL register
+ * @data: I2C data value (0 or 1) to set
+ *
+ * Sets the I2C data bit
+ **/
+static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data)
+{
+ s32 status = 0;
+
+ if (data)
+ *i2cctl |= IXGBE_I2C_DATA_OUT;
+ else
+ *i2cctl &= ~IXGBE_I2C_DATA_OUT;
+
+ IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */
+ udelay(IXGBE_I2C_T_RISE + IXGBE_I2C_T_FALL + IXGBE_I2C_T_SU_DATA);
+
+ /* Verify data was set correctly */
+ *i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+ if (data != ixgbe_get_i2c_data(i2cctl)) {
+ status = IXGBE_ERR_I2C;
+ hw_dbg(hw, "Error - I2C data was not set to %X.\n", data);
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_get_i2c_data - Reads the I2C SDA data bit
+ * @hw: pointer to hardware structure
+ * @i2cctl: Current value of I2CCTL register
+ *
+ * Returns the I2C data bit value
+ **/
+static bool ixgbe_get_i2c_data(u32 *i2cctl)
+{
+ bool data;
+
+ if (*i2cctl & IXGBE_I2C_DATA_IN)
+ data = 1;
+ else
+ data = 0;
+
+ return data;
+}
+
+/**
+ * ixgbe_i2c_bus_clear - Clears the I2C bus
+ * @hw: pointer to hardware structure
+ *
+ * Clears the I2C bus by sending nine clock pulses.
+ * Used when data line is stuck low.
+ **/
+void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw)
+{
+ u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+ u32 i;
+
+ ixgbe_i2c_start(hw);
+
+ ixgbe_set_i2c_data(hw, &i2cctl, 1);
+
+ for (i = 0; i < 9; i++) {
+ ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+ /* Min high period of clock is 4us */
+ udelay(IXGBE_I2C_T_HIGH);
+
+ ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+ /* Min low period of clock is 4.7us*/
+ udelay(IXGBE_I2C_T_LOW);
+ }
+
+ ixgbe_i2c_start(hw);
+
+ /* Put the i2c bus back to default state */
+ ixgbe_i2c_stop(hw);
+}
+
+/**
+ * ixgbe_tn_check_overtemp - Checks if an overtemp occurred.
+ * @hw: pointer to hardware structure
+ *
+ * Checks if the LASI temp alarm status was triggered due to overtemp
+ **/
+s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+ u16 phy_data = 0;
+
+ if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM)
+ goto out;
+
+ /* Check that the LASI temp alarm status was triggered */
+ hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_data);
+
+ if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM))
+ goto out;
+
+ status = IXGBE_ERR_OVERTEMP;
+out:
+ return status;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
new file mode 100644
index 00000000..bbe5a9e3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
@@ -0,0 +1,137 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_PHY_H_
+#define _IXGBE_PHY_H_
+
+#include "ixgbe_type.h"
+#define IXGBE_I2C_EEPROM_DEV_ADDR 0xA0
+
+/* EEPROM byte offsets */
+#define IXGBE_SFF_IDENTIFIER 0x0
+#define IXGBE_SFF_IDENTIFIER_SFP 0x3
+#define IXGBE_SFF_VENDOR_OUI_BYTE0 0x25
+#define IXGBE_SFF_VENDOR_OUI_BYTE1 0x26
+#define IXGBE_SFF_VENDOR_OUI_BYTE2 0x27
+#define IXGBE_SFF_1GBE_COMP_CODES 0x6
+#define IXGBE_SFF_10GBE_COMP_CODES 0x3
+#define IXGBE_SFF_CABLE_TECHNOLOGY 0x8
+#define IXGBE_SFF_CABLE_SPEC_COMP 0x3C
+
+/* Bitmasks */
+#define IXGBE_SFF_DA_PASSIVE_CABLE 0x4
+#define IXGBE_SFF_DA_ACTIVE_CABLE 0x8
+#define IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING 0x4
+#define IXGBE_SFF_1GBASESX_CAPABLE 0x1
+#define IXGBE_SFF_1GBASELX_CAPABLE 0x2
+#define IXGBE_SFF_1GBASET_CAPABLE 0x8
+#define IXGBE_SFF_10GBASESR_CAPABLE 0x10
+#define IXGBE_SFF_10GBASELR_CAPABLE 0x20
+#define IXGBE_I2C_EEPROM_READ_MASK 0x100
+#define IXGBE_I2C_EEPROM_STATUS_MASK 0x3
+#define IXGBE_I2C_EEPROM_STATUS_NO_OPERATION 0x0
+#define IXGBE_I2C_EEPROM_STATUS_PASS 0x1
+#define IXGBE_I2C_EEPROM_STATUS_FAIL 0x2
+#define IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS 0x3
+
+/* Flow control defines */
+#define IXGBE_TAF_SYM_PAUSE 0x400
+#define IXGBE_TAF_ASM_PAUSE 0x800
+
+/* Bit-shift macros */
+#define IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT 24
+#define IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT 16
+#define IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT 8
+
+/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */
+#define IXGBE_SFF_VENDOR_OUI_TYCO 0x00407600
+#define IXGBE_SFF_VENDOR_OUI_FTL 0x00906500
+#define IXGBE_SFF_VENDOR_OUI_AVAGO 0x00176A00
+#define IXGBE_SFF_VENDOR_OUI_INTEL 0x001B2100
+
+/* I2C SDA and SCL timing parameters for standard mode */
+#define IXGBE_I2C_T_HD_STA 4
+#define IXGBE_I2C_T_LOW 5
+#define IXGBE_I2C_T_HIGH 4
+#define IXGBE_I2C_T_SU_STA 5
+#define IXGBE_I2C_T_HD_DATA 5
+#define IXGBE_I2C_T_SU_DATA 1
+#define IXGBE_I2C_T_RISE 1
+#define IXGBE_I2C_T_FALL 1
+#define IXGBE_I2C_T_SU_STO 4
+#define IXGBE_I2C_T_BUF 5
+
+#define IXGBE_TN_LASI_STATUS_REG 0x9005
+#define IXGBE_TN_LASI_STATUS_TEMP_ALARM 0x0008
+
+s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw);
+bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr);
+enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id);
+s32 ixgbe_get_phy_id(struct ixgbe_hw *hw);
+s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw);
+s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw);
+s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+ u32 device_type, u16 *phy_data);
+s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+ u32 device_type, u16 phy_data);
+s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw);
+s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg,
+ bool autoneg_wait_to_complete);
+s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed,
+ bool *autoneg);
+
+/* PHY specific */
+s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed,
+ bool *link_up);
+s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw);
+s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw,
+ u16 *firmware_version);
+s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw,
+ u16 *firmware_version);
+
+s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw);
+s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw);
+s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw);
+s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw);
+s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
+ u16 *list_offset,
+ u16 *data_offset);
+s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw);
+s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data);
+s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data);
+s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 *eeprom_data);
+s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 eeprom_data);
+void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw);
+#endif /* _IXGBE_PHY_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h
new file mode 100644
index 00000000..5e3559fd
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h
@@ -0,0 +1,73 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+#ifndef _IXGBE_SRIOV_H_
+#define _IXGBE_SRIOV_H_
+
+int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
+ int entries, u16 *hash_list, u32 vf);
+void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter);
+int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf);
+void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe);
+void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf);
+void ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf);
+void ixgbe_msg_task(struct ixgbe_adapter *adapter);
+int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
+ int vf, unsigned char *mac_addr);
+void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter);
+void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
+#ifdef IFLA_VF_MAX
+int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
+int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
+ u8 qos);
+int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
+#endif
+int ixgbe_ndo_get_vf_config(struct net_device *netdev,
+ int vf, struct ifla_vf_info *ivi);
+#endif
+void ixgbe_disable_sriov(struct ixgbe_adapter *adapter);
+#ifdef CONFIG_PCI_IOV
+int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask);
+void ixgbe_enable_sriov(struct ixgbe_adapter *adapter);
+#endif
+int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter);
+#ifdef IFLA_VF_MAX
+void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter);
+#endif /* IFLA_VF_MAX */
+void ixgbe_dump_registers(struct ixgbe_adapter *adapter);
+
+/*
+ * These are defined in ixgbe_type.h on behalf of the VF driver
+ * but we need them here unwrapped for the PF driver.
+ */
+#define IXGBE_DEV_ID_82599_VF 0x10ED
+#define IXGBE_DEV_ID_X540_VF 0x1515
+
+#endif /* _IXGBE_SRIOV_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
new file mode 100644
index 00000000..6b21c879
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
@@ -0,0 +1,3254 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_TYPE_H_
+#define _IXGBE_TYPE_H_
+
+#include "ixgbe_osdep.h"
+
+
+/* Vendor ID */
+#define IXGBE_INTEL_VENDOR_ID 0x8086
+
+/* Device IDs */
+#define IXGBE_DEV_ID_82598 0x10B6
+#define IXGBE_DEV_ID_82598_BX 0x1508
+#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6
+#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7
+#define IXGBE_DEV_ID_82598AT 0x10C8
+#define IXGBE_DEV_ID_82598AT2 0x150B
+#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB
+#define IXGBE_DEV_ID_82598EB_CX4 0x10DD
+#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC
+#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1
+#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1
+#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4
+#define IXGBE_DEV_ID_82599_KX4 0x10F7
+#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514
+#define IXGBE_DEV_ID_82599_KR 0x1517
+#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8
+#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C
+#define IXGBE_DEV_ID_82599_CX4 0x10F9
+#define IXGBE_DEV_ID_82599_SFP 0x10FB
+#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9
+#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0
+#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A
+#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529
+#define IXGBE_DEV_ID_82599_SFP_EM 0x1507
+#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D
+#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558
+#define IXGBE_DEV_ID_82599EN_SFP 0x1557
+#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC
+#define IXGBE_DEV_ID_82599_T3_LOM 0x151C
+#define IXGBE_DEV_ID_82599_LS 0x154F
+#define IXGBE_DEV_ID_X540T 0x1528
+
+/* General Registers */
+#define IXGBE_CTRL 0x00000
+#define IXGBE_STATUS 0x00008
+#define IXGBE_CTRL_EXT 0x00018
+#define IXGBE_ESDP 0x00020
+#define IXGBE_EODSDP 0x00028
+#define IXGBE_I2CCTL 0x00028
+#define IXGBE_PHY_GPIO 0x00028
+#define IXGBE_MAC_GPIO 0x00030
+#define IXGBE_PHYINT_STATUS0 0x00100
+#define IXGBE_PHYINT_STATUS1 0x00104
+#define IXGBE_PHYINT_STATUS2 0x00108
+#define IXGBE_LEDCTL 0x00200
+#define IXGBE_FRTIMER 0x00048
+#define IXGBE_TCPTIMER 0x0004C
+#define IXGBE_CORESPARE 0x00600
+#define IXGBE_EXVET 0x05078
+
+/* NVM Registers */
+#define IXGBE_EEC 0x10010
+#define IXGBE_EERD 0x10014
+#define IXGBE_EEWR 0x10018
+#define IXGBE_FLA 0x1001C
+#define IXGBE_EEMNGCTL 0x10110
+#define IXGBE_EEMNGDATA 0x10114
+#define IXGBE_FLMNGCTL 0x10118
+#define IXGBE_FLMNGDATA 0x1011C
+#define IXGBE_FLMNGCNT 0x10120
+#define IXGBE_FLOP 0x1013C
+#define IXGBE_GRC 0x10200
+#define IXGBE_SRAMREL 0x10210
+#define IXGBE_PHYDBG 0x10218
+
+/* General Receive Control */
+#define IXGBE_GRC_MNG 0x00000001 /* Manageability Enable */
+#define IXGBE_GRC_APME 0x00000002 /* APM enabled in EEPROM */
+
+#define IXGBE_VPDDIAG0 0x10204
+#define IXGBE_VPDDIAG1 0x10208
+
+/* I2CCTL Bit Masks */
+#define IXGBE_I2C_CLK_IN 0x00000001
+#define IXGBE_I2C_CLK_OUT 0x00000002
+#define IXGBE_I2C_DATA_IN 0x00000004
+#define IXGBE_I2C_DATA_OUT 0x00000008
+#define IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT 500
+
+#define IXGBE_I2C_THERMAL_SENSOR_ADDR 0xF8
+#define IXGBE_EMC_INTERNAL_DATA 0x00
+#define IXGBE_EMC_INTERNAL_THERM_LIMIT 0x20
+#define IXGBE_EMC_DIODE1_DATA 0x01
+#define IXGBE_EMC_DIODE1_THERM_LIMIT 0x19
+#define IXGBE_EMC_DIODE2_DATA 0x23
+#define IXGBE_EMC_DIODE2_THERM_LIMIT 0x1A
+
+#define IXGBE_MAX_SENSORS 3
+
+struct ixgbe_thermal_diode_data {
+ u8 location;
+ u8 temp;
+ u8 caution_thresh;
+ u8 max_op_thresh;
+};
+
+struct ixgbe_thermal_sensor_data {
+ struct ixgbe_thermal_diode_data sensor[IXGBE_MAX_SENSORS];
+};
+
+/* Interrupt Registers */
+#define IXGBE_EICR 0x00800
+#define IXGBE_EICS 0x00808
+#define IXGBE_EIMS 0x00880
+#define IXGBE_EIMC 0x00888
+#define IXGBE_EIAC 0x00810
+#define IXGBE_EIAM 0x00890
+#define IXGBE_EICS_EX(_i) (0x00A90 + (_i) * 4)
+#define IXGBE_EIMS_EX(_i) (0x00AA0 + (_i) * 4)
+#define IXGBE_EIMC_EX(_i) (0x00AB0 + (_i) * 4)
+#define IXGBE_EIAM_EX(_i) (0x00AD0 + (_i) * 4)
+/* 82599 EITR is only 12 bits, with the lower 3 always zero */
+/*
+ * 82598 EITR is 16 bits but set the limits based on the max
+ * supported by all ixgbe hardware
+ */
+#define IXGBE_MAX_INT_RATE 488281
+#define IXGBE_MIN_INT_RATE 956
+#define IXGBE_MAX_EITR 0x00000FF8
+#define IXGBE_MIN_EITR 8
+#define IXGBE_EITR(_i) (((_i) <= 23) ? (0x00820 + ((_i) * 4)) : \
+ (0x012300 + (((_i) - 24) * 4)))
+#define IXGBE_EITR_ITR_INT_MASK 0x00000FF8
+#define IXGBE_EITR_LLI_MOD 0x00008000
+#define IXGBE_EITR_CNT_WDIS 0x80000000
+#define IXGBE_IVAR(_i) (0x00900 + ((_i) * 4)) /* 24 at 0x900-0x960 */
+#define IXGBE_IVAR_MISC 0x00A00 /* misc MSI-X interrupt causes */
+#define IXGBE_EITRSEL 0x00894
+#define IXGBE_MSIXT 0x00000 /* MSI-X Table. 0x0000 - 0x01C */
+#define IXGBE_MSIXPBA 0x02000 /* MSI-X Pending bit array */
+#define IXGBE_PBACL(_i) (((_i) == 0) ? (0x11068) : (0x110C0 + ((_i) * 4)))
+#define IXGBE_GPIE 0x00898
+
+/* Flow Control Registers */
+#define IXGBE_FCADBUL 0x03210
+#define IXGBE_FCADBUH 0x03214
+#define IXGBE_FCAMACL 0x04328
+#define IXGBE_FCAMACH 0x0432C
+#define IXGBE_FCRTH_82599(_i) (0x03260 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_FCRTL_82599(_i) (0x03220 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_PFCTOP 0x03008
+#define IXGBE_FCTTV(_i) (0x03200 + ((_i) * 4)) /* 4 of these (0-3) */
+#define IXGBE_FCRTL(_i) (0x03220 + ((_i) * 8)) /* 8 of these (0-7) */
+#define IXGBE_FCRTH(_i) (0x03260 + ((_i) * 8)) /* 8 of these (0-7) */
+#define IXGBE_FCRTV 0x032A0
+#define IXGBE_FCCFG 0x03D00
+#define IXGBE_TFCS 0x0CE00
+
+/* Receive DMA Registers */
+#define IXGBE_RDBAL(_i) (((_i) < 64) ? (0x01000 + ((_i) * 0x40)) : \
+ (0x0D000 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDBAH(_i) (((_i) < 64) ? (0x01004 + ((_i) * 0x40)) : \
+ (0x0D004 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDLEN(_i) (((_i) < 64) ? (0x01008 + ((_i) * 0x40)) : \
+ (0x0D008 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDH(_i) (((_i) < 64) ? (0x01010 + ((_i) * 0x40)) : \
+ (0x0D010 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDT(_i) (((_i) < 64) ? (0x01018 + ((_i) * 0x40)) : \
+ (0x0D018 + (((_i) - 64) * 0x40)))
+#define IXGBE_RXDCTL(_i) (((_i) < 64) ? (0x01028 + ((_i) * 0x40)) : \
+ (0x0D028 + (((_i) - 64) * 0x40)))
+#define IXGBE_RSCCTL(_i) (((_i) < 64) ? (0x0102C + ((_i) * 0x40)) : \
+ (0x0D02C + (((_i) - 64) * 0x40)))
+#define IXGBE_RSCDBU 0x03028
+#define IXGBE_RDDCC 0x02F20
+#define IXGBE_RXMEMWRAP 0x03190
+#define IXGBE_STARCTRL 0x03024
+/*
+ * Split and Replication Receive Control Registers
+ * 00-15 : 0x02100 + n*4
+ * 16-64 : 0x01014 + n*0x40
+ * 64-127: 0x0D014 + (n-64)*0x40
+ */
+#define IXGBE_SRRCTL(_i) (((_i) <= 15) ? (0x02100 + ((_i) * 4)) : \
+ (((_i) < 64) ? (0x01014 + ((_i) * 0x40)) : \
+ (0x0D014 + (((_i) - 64) * 0x40))))
+/*
+ * Rx DCA Control Register:
+ * 00-15 : 0x02200 + n*4
+ * 16-64 : 0x0100C + n*0x40
+ * 64-127: 0x0D00C + (n-64)*0x40
+ */
+#define IXGBE_DCA_RXCTRL(_i) (((_i) <= 15) ? (0x02200 + ((_i) * 4)) : \
+ (((_i) < 64) ? (0x0100C + ((_i) * 0x40)) : \
+ (0x0D00C + (((_i) - 64) * 0x40))))
+#define IXGBE_RDRXCTL 0x02F00
+#define IXGBE_RDRXCTL_RSC_PUSH 0x80
+/* 8 of these 0x03C00 - 0x03C1C */
+#define IXGBE_RXPBSIZE(_i) (0x03C00 + ((_i) * 4))
+#define IXGBE_RXCTRL 0x03000
+#define IXGBE_DROPEN 0x03D04
+#define IXGBE_RXPBSIZE_SHIFT 10
+
+/* Receive Registers */
+#define IXGBE_RXCSUM 0x05000
+#define IXGBE_RFCTL 0x05008
+#define IXGBE_DRECCCTL 0x02F08
+#define IXGBE_DRECCCTL_DISABLE 0
+#define IXGBE_DRECCCTL2 0x02F8C
+
+/* Multicast Table Array - 128 entries */
+#define IXGBE_MTA(_i) (0x05200 + ((_i) * 4))
+#define IXGBE_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
+ (0x0A200 + ((_i) * 8)))
+#define IXGBE_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
+ (0x0A204 + ((_i) * 8)))
+#define IXGBE_MPSAR_LO(_i) (0x0A600 + ((_i) * 8))
+#define IXGBE_MPSAR_HI(_i) (0x0A604 + ((_i) * 8))
+/* Packet split receive type */
+#define IXGBE_PSRTYPE(_i) (((_i) <= 15) ? (0x05480 + ((_i) * 4)) : \
+ (0x0EA00 + ((_i) * 4)))
+/* array of 4096 1-bit vlan filters */
+#define IXGBE_VFTA(_i) (0x0A000 + ((_i) * 4))
+/*array of 4096 4-bit vlan vmdq indices */
+#define IXGBE_VFTAVIND(_j, _i) (0x0A200 + ((_j) * 0x200) + ((_i) * 4))
+#define IXGBE_FCTRL 0x05080
+#define IXGBE_VLNCTRL 0x05088
+#define IXGBE_MCSTCTRL 0x05090
+#define IXGBE_MRQC 0x05818
+#define IXGBE_SAQF(_i) (0x0E000 + ((_i) * 4)) /* Source Address Queue Filter */
+#define IXGBE_DAQF(_i) (0x0E200 + ((_i) * 4)) /* Dest. Address Queue Filter */
+#define IXGBE_SDPQF(_i) (0x0E400 + ((_i) * 4)) /* Src Dest. Addr Queue Filter */
+#define IXGBE_FTQF(_i) (0x0E600 + ((_i) * 4)) /* Five Tuple Queue Filter */
+#define IXGBE_ETQF(_i) (0x05128 + ((_i) * 4)) /* EType Queue Filter */
+#define IXGBE_ETQS(_i) (0x0EC00 + ((_i) * 4)) /* EType Queue Select */
+#define IXGBE_SYNQF 0x0EC30 /* SYN Packet Queue Filter */
+#define IXGBE_RQTC 0x0EC70
+#define IXGBE_MTQC 0x08120
+#define IXGBE_VLVF(_i) (0x0F100 + ((_i) * 4)) /* 64 of these (0-63) */
+#define IXGBE_VLVFB(_i) (0x0F200 + ((_i) * 4)) /* 128 of these (0-127) */
+#define IXGBE_VMVIR(_i) (0x08000 + ((_i) * 4)) /* 64 of these (0-63) */
+#define IXGBE_VT_CTL 0x051B0
+#define IXGBE_PFMAILBOX(_i) (0x04B00 + (4 * (_i))) /* 64 total */
+/* 64 Mailboxes, 16 DW each */
+#define IXGBE_PFMBMEM(_i) (0x13000 + (64 * (_i)))
+#define IXGBE_PFMBICR(_i) (0x00710 + (4 * (_i))) /* 4 total */
+#define IXGBE_PFMBIMR(_i) (0x00720 + (4 * (_i))) /* 4 total */
+#define IXGBE_VFRE(_i) (0x051E0 + ((_i) * 4))
+#define IXGBE_VFTE(_i) (0x08110 + ((_i) * 4))
+#define IXGBE_VMECM(_i) (0x08790 + ((_i) * 4))
+#define IXGBE_QDE 0x2F04
+#define IXGBE_VMTXSW(_i) (0x05180 + ((_i) * 4)) /* 2 total */
+#define IXGBE_VMOLR(_i) (0x0F000 + ((_i) * 4)) /* 64 total */
+#define IXGBE_UTA(_i) (0x0F400 + ((_i) * 4))
+#define IXGBE_MRCTL(_i) (0x0F600 + ((_i) * 4))
+#define IXGBE_VMRVLAN(_i) (0x0F610 + ((_i) * 4))
+#define IXGBE_VMRVM(_i) (0x0F630 + ((_i) * 4))
+#define IXGBE_L34T_IMIR(_i) (0x0E800 + ((_i) * 4)) /*128 of these (0-127)*/
+#define IXGBE_RXFECCERR0 0x051B8
+#define IXGBE_LLITHRESH 0x0EC90
+#define IXGBE_IMIR(_i) (0x05A80 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_IMIRVP 0x05AC0
+#define IXGBE_VMD_CTL 0x0581C
+#define IXGBE_RETA(_i) (0x05C00 + ((_i) * 4)) /* 32 of these (0-31) */
+#define IXGBE_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* 10 of these (0-9) */
+
+/* Flow Director registers */
+#define IXGBE_FDIRCTRL 0x0EE00
+#define IXGBE_FDIRHKEY 0x0EE68
+#define IXGBE_FDIRSKEY 0x0EE6C
+#define IXGBE_FDIRDIP4M 0x0EE3C
+#define IXGBE_FDIRSIP4M 0x0EE40
+#define IXGBE_FDIRTCPM 0x0EE44
+#define IXGBE_FDIRUDPM 0x0EE48
+#define IXGBE_FDIRIP6M 0x0EE74
+#define IXGBE_FDIRM 0x0EE70
+
+/* Flow Director Stats registers */
+#define IXGBE_FDIRFREE 0x0EE38
+#define IXGBE_FDIRLEN 0x0EE4C
+#define IXGBE_FDIRUSTAT 0x0EE50
+#define IXGBE_FDIRFSTAT 0x0EE54
+#define IXGBE_FDIRMATCH 0x0EE58
+#define IXGBE_FDIRMISS 0x0EE5C
+
+/* Flow Director Programming registers */
+#define IXGBE_FDIRSIPv6(_i) (0x0EE0C + ((_i) * 4)) /* 3 of these (0-2) */
+#define IXGBE_FDIRIPSA 0x0EE18
+#define IXGBE_FDIRIPDA 0x0EE1C
+#define IXGBE_FDIRPORT 0x0EE20
+#define IXGBE_FDIRVLAN 0x0EE24
+#define IXGBE_FDIRHASH 0x0EE28
+#define IXGBE_FDIRCMD 0x0EE2C
+
+/* Transmit DMA registers */
+#define IXGBE_TDBAL(_i) (0x06000 + ((_i) * 0x40)) /* 32 of them (0-31)*/
+#define IXGBE_TDBAH(_i) (0x06004 + ((_i) * 0x40))
+#define IXGBE_TDLEN(_i) (0x06008 + ((_i) * 0x40))
+#define IXGBE_TDH(_i) (0x06010 + ((_i) * 0x40))
+#define IXGBE_TDT(_i) (0x06018 + ((_i) * 0x40))
+#define IXGBE_TXDCTL(_i) (0x06028 + ((_i) * 0x40))
+#define IXGBE_TDWBAL(_i) (0x06038 + ((_i) * 0x40))
+#define IXGBE_TDWBAH(_i) (0x0603C + ((_i) * 0x40))
+#define IXGBE_DTXCTL 0x07E00
+
+#define IXGBE_DMATXCTL 0x04A80
+#define IXGBE_PFVFSPOOF(_i) (0x08200 + ((_i) * 4)) /* 8 of these 0 - 7 */
+#define IXGBE_PFDTXGSWC 0x08220
+#define IXGBE_DTXMXSZRQ 0x08100
+#define IXGBE_DTXTCPFLGL 0x04A88
+#define IXGBE_DTXTCPFLGH 0x04A8C
+#define IXGBE_LBDRPEN 0x0CA00
+#define IXGBE_TXPBTHRESH(_i) (0x04950 + ((_i) * 4)) /* 8 of these 0 - 7 */
+
+#define IXGBE_DMATXCTL_TE 0x1 /* Transmit Enable */
+#define IXGBE_DMATXCTL_NS 0x2 /* No Snoop LSO hdr buffer */
+#define IXGBE_DMATXCTL_GDV 0x8 /* Global Double VLAN */
+#define IXGBE_DMATXCTL_VT_SHIFT 16 /* VLAN EtherType */
+
+#define IXGBE_PFDTXGSWC_VT_LBEN 0x1 /* Local L2 VT switch enable */
+
+/* Anti-spoofing defines */
+#define IXGBE_SPOOF_MACAS_MASK 0xFF
+#define IXGBE_SPOOF_VLANAS_MASK 0xFF00
+#define IXGBE_SPOOF_VLANAS_SHIFT 8
+#define IXGBE_PFVFSPOOF_REG_COUNT 8
+/* 16 of these (0-15) */
+#define IXGBE_DCA_TXCTRL(_i) (0x07200 + ((_i) * 4))
+/* Tx DCA Control register : 128 of these (0-127) */
+#define IXGBE_DCA_TXCTRL_82599(_i) (0x0600C + ((_i) * 0x40))
+#define IXGBE_TIPG 0x0CB00
+#define IXGBE_TXPBSIZE(_i) (0x0CC00 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_MNGTXMAP 0x0CD10
+#define IXGBE_TIPG_FIBER_DEFAULT 3
+#define IXGBE_TXPBSIZE_SHIFT 10
+
+/* Wake up registers */
+#define IXGBE_WUC 0x05800
+#define IXGBE_WUFC 0x05808
+#define IXGBE_WUS 0x05810
+#define IXGBE_IPAV 0x05838
+#define IXGBE_IP4AT 0x05840 /* IPv4 table 0x5840-0x5858 */
+#define IXGBE_IP6AT 0x05880 /* IPv6 table 0x5880-0x588F */
+
+#define IXGBE_WUPL 0x05900
+#define IXGBE_WUPM 0x05A00 /* wake up pkt memory 0x5A00-0x5A7C */
+#define IXGBE_FHFT(_n) (0x09000 + (_n * 0x100)) /* Flex host filter table */
+/* Ext Flexible Host Filter Table */
+#define IXGBE_FHFT_EXT(_n) (0x09800 + (_n * 0x100))
+
+#define IXGBE_FLEXIBLE_FILTER_COUNT_MAX 4
+#define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX 2
+
+/* Each Flexible Filter is at most 128 (0x80) bytes in length */
+#define IXGBE_FLEXIBLE_FILTER_SIZE_MAX 128
+#define IXGBE_FHFT_LENGTH_OFFSET 0xFC /* Length byte in FHFT */
+#define IXGBE_FHFT_LENGTH_MASK 0x0FF /* Length in lower byte */
+
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define IXGBE_WUC_PME_EN 0x00000002 /* PME Enable */
+#define IXGBE_WUC_PME_STATUS 0x00000004 /* PME Status */
+#define IXGBE_WUC_WKEN 0x00000010 /* Enable PE_WAKE_N pin assertion */
+
+/* Wake Up Filter Control */
+#define IXGBE_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
+#define IXGBE_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */
+#define IXGBE_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */
+#define IXGBE_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */
+#define IXGBE_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */
+#define IXGBE_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */
+#define IXGBE_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */
+#define IXGBE_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */
+#define IXGBE_WUFC_MNG 0x00000100 /* Directed Mgmt Packet Wakeup Enable */
+
+#define IXGBE_WUFC_IGNORE_TCO 0x00008000 /* Ignore WakeOn TCO packets */
+#define IXGBE_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */
+#define IXGBE_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */
+#define IXGBE_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */
+#define IXGBE_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */
+#define IXGBE_WUFC_FLX4 0x00100000 /* Flexible Filter 4 Enable */
+#define IXGBE_WUFC_FLX5 0x00200000 /* Flexible Filter 5 Enable */
+#define IXGBE_WUFC_FLX_FILTERS 0x000F0000 /* Mask for 4 flex filters */
+/* Mask for Ext. flex filters */
+#define IXGBE_WUFC_EXT_FLX_FILTERS 0x00300000
+#define IXGBE_WUFC_ALL_FILTERS 0x003F00FF /* Mask for all wakeup filters */
+#define IXGBE_WUFC_FLX_OFFSET 16 /* Offset to the Flexible Filters bits */
+
+/* Wake Up Status */
+#define IXGBE_WUS_LNKC IXGBE_WUFC_LNKC
+#define IXGBE_WUS_MAG IXGBE_WUFC_MAG
+#define IXGBE_WUS_EX IXGBE_WUFC_EX
+#define IXGBE_WUS_MC IXGBE_WUFC_MC
+#define IXGBE_WUS_BC IXGBE_WUFC_BC
+#define IXGBE_WUS_ARP IXGBE_WUFC_ARP
+#define IXGBE_WUS_IPV4 IXGBE_WUFC_IPV4
+#define IXGBE_WUS_IPV6 IXGBE_WUFC_IPV6
+#define IXGBE_WUS_MNG IXGBE_WUFC_MNG
+#define IXGBE_WUS_FLX0 IXGBE_WUFC_FLX0
+#define IXGBE_WUS_FLX1 IXGBE_WUFC_FLX1
+#define IXGBE_WUS_FLX2 IXGBE_WUFC_FLX2
+#define IXGBE_WUS_FLX3 IXGBE_WUFC_FLX3
+#define IXGBE_WUS_FLX4 IXGBE_WUFC_FLX4
+#define IXGBE_WUS_FLX5 IXGBE_WUFC_FLX5
+#define IXGBE_WUS_FLX_FILTERS IXGBE_WUFC_FLX_FILTERS
+
+/* Wake Up Packet Length */
+#define IXGBE_WUPL_LENGTH_MASK 0xFFFF
+
+/* DCB registers */
+#define IXGBE_DCB_MAX_TRAFFIC_CLASS 8
+#define IXGBE_RMCS 0x03D00
+#define IXGBE_DPMCS 0x07F40
+#define IXGBE_PDPMCS 0x0CD00
+#define IXGBE_RUPPBMR 0x050A0
+#define IXGBE_RT2CR(_i) (0x03C20 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RT2SR(_i) (0x03C40 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_TDTQ2TCCR(_i) (0x0602C + ((_i) * 0x40)) /* 8 of these (0-7) */
+#define IXGBE_TDTQ2TCSR(_i) (0x0622C + ((_i) * 0x40)) /* 8 of these (0-7) */
+#define IXGBE_TDPT2TCCR(_i) (0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_TDPT2TCSR(_i) (0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */
+
+
+/* Security Control Registers */
+#define IXGBE_SECTXCTRL 0x08800
+#define IXGBE_SECTXSTAT 0x08804
+#define IXGBE_SECTXBUFFAF 0x08808
+#define IXGBE_SECTXMINIFG 0x08810
+#define IXGBE_SECRXCTRL 0x08D00
+#define IXGBE_SECRXSTAT 0x08D04
+
+/* Security Bit Fields and Masks */
+#define IXGBE_SECTXCTRL_SECTX_DIS 0x00000001
+#define IXGBE_SECTXCTRL_TX_DIS 0x00000002
+#define IXGBE_SECTXCTRL_STORE_FORWARD 0x00000004
+
+#define IXGBE_SECTXSTAT_SECTX_RDY 0x00000001
+#define IXGBE_SECTXSTAT_ECC_TXERR 0x00000002
+
+#define IXGBE_SECRXCTRL_SECRX_DIS 0x00000001
+#define IXGBE_SECRXCTRL_RX_DIS 0x00000002
+
+#define IXGBE_SECRXSTAT_SECRX_RDY 0x00000001
+#define IXGBE_SECRXSTAT_ECC_RXERR 0x00000002
+
+/* LinkSec (MacSec) Registers */
+#define IXGBE_LSECTXCAP 0x08A00
+#define IXGBE_LSECRXCAP 0x08F00
+#define IXGBE_LSECTXCTRL 0x08A04
+#define IXGBE_LSECTXSCL 0x08A08 /* SCI Low */
+#define IXGBE_LSECTXSCH 0x08A0C /* SCI High */
+#define IXGBE_LSECTXSA 0x08A10
+#define IXGBE_LSECTXPN0 0x08A14
+#define IXGBE_LSECTXPN1 0x08A18
+#define IXGBE_LSECTXKEY0(_n) (0x08A1C + (4 * (_n))) /* 4 of these (0-3) */
+#define IXGBE_LSECTXKEY1(_n) (0x08A2C + (4 * (_n))) /* 4 of these (0-3) */
+#define IXGBE_LSECRXCTRL 0x08F04
+#define IXGBE_LSECRXSCL 0x08F08
+#define IXGBE_LSECRXSCH 0x08F0C
+#define IXGBE_LSECRXSA(_i) (0x08F10 + (4 * (_i))) /* 2 of these (0-1) */
+#define IXGBE_LSECRXPN(_i) (0x08F18 + (4 * (_i))) /* 2 of these (0-1) */
+#define IXGBE_LSECRXKEY(_n, _m) (0x08F20 + ((0x10 * (_n)) + (4 * (_m))))
+#define IXGBE_LSECTXUT 0x08A3C /* OutPktsUntagged */
+#define IXGBE_LSECTXPKTE 0x08A40 /* OutPktsEncrypted */
+#define IXGBE_LSECTXPKTP 0x08A44 /* OutPktsProtected */
+#define IXGBE_LSECTXOCTE 0x08A48 /* OutOctetsEncrypted */
+#define IXGBE_LSECTXOCTP 0x08A4C /* OutOctetsProtected */
+#define IXGBE_LSECRXUT 0x08F40 /* InPktsUntagged/InPktsNoTag */
+#define IXGBE_LSECRXOCTD 0x08F44 /* InOctetsDecrypted */
+#define IXGBE_LSECRXOCTV 0x08F48 /* InOctetsValidated */
+#define IXGBE_LSECRXBAD 0x08F4C /* InPktsBadTag */
+#define IXGBE_LSECRXNOSCI 0x08F50 /* InPktsNoSci */
+#define IXGBE_LSECRXUNSCI 0x08F54 /* InPktsUnknownSci */
+#define IXGBE_LSECRXUNCH 0x08F58 /* InPktsUnchecked */
+#define IXGBE_LSECRXDELAY 0x08F5C /* InPktsDelayed */
+#define IXGBE_LSECRXLATE 0x08F60 /* InPktsLate */
+#define IXGBE_LSECRXOK(_n) (0x08F64 + (0x04 * (_n))) /* InPktsOk */
+#define IXGBE_LSECRXINV(_n) (0x08F6C + (0x04 * (_n))) /* InPktsInvalid */
+#define IXGBE_LSECRXNV(_n) (0x08F74 + (0x04 * (_n))) /* InPktsNotValid */
+#define IXGBE_LSECRXUNSA 0x08F7C /* InPktsUnusedSa */
+#define IXGBE_LSECRXNUSA 0x08F80 /* InPktsNotUsingSa */
+
+/* LinkSec (MacSec) Bit Fields and Masks */
+#define IXGBE_LSECTXCAP_SUM_MASK 0x00FF0000
+#define IXGBE_LSECTXCAP_SUM_SHIFT 16
+#define IXGBE_LSECRXCAP_SUM_MASK 0x00FF0000
+#define IXGBE_LSECRXCAP_SUM_SHIFT 16
+
+#define IXGBE_LSECTXCTRL_EN_MASK 0x00000003
+#define IXGBE_LSECTXCTRL_DISABLE 0x0
+#define IXGBE_LSECTXCTRL_AUTH 0x1
+#define IXGBE_LSECTXCTRL_AUTH_ENCRYPT 0x2
+#define IXGBE_LSECTXCTRL_AISCI 0x00000020
+#define IXGBE_LSECTXCTRL_PNTHRSH_MASK 0xFFFFFF00
+#define IXGBE_LSECTXCTRL_RSV_MASK 0x000000D8
+
+#define IXGBE_LSECRXCTRL_EN_MASK 0x0000000C
+#define IXGBE_LSECRXCTRL_EN_SHIFT 2
+#define IXGBE_LSECRXCTRL_DISABLE 0x0
+#define IXGBE_LSECRXCTRL_CHECK 0x1
+#define IXGBE_LSECRXCTRL_STRICT 0x2
+#define IXGBE_LSECRXCTRL_DROP 0x3
+#define IXGBE_LSECRXCTRL_PLSH 0x00000040
+#define IXGBE_LSECRXCTRL_RP 0x00000080
+#define IXGBE_LSECRXCTRL_RSV_MASK 0xFFFFFF33
+
+/* IpSec Registers */
+#define IXGBE_IPSTXIDX 0x08900
+#define IXGBE_IPSTXSALT 0x08904
+#define IXGBE_IPSTXKEY(_i) (0x08908 + (4 * (_i))) /* 4 of these (0-3) */
+#define IXGBE_IPSRXIDX 0x08E00
+#define IXGBE_IPSRXIPADDR(_i) (0x08E04 + (4 * (_i))) /* 4 of these (0-3) */
+#define IXGBE_IPSRXSPI 0x08E14
+#define IXGBE_IPSRXIPIDX 0x08E18
+#define IXGBE_IPSRXKEY(_i) (0x08E1C + (4 * (_i))) /* 4 of these (0-3) */
+#define IXGBE_IPSRXSALT 0x08E2C
+#define IXGBE_IPSRXMOD 0x08E30
+
+#define IXGBE_SECTXCTRL_STORE_FORWARD_ENABLE 0x4
+
+/* DCB registers */
+#define IXGBE_RTRPCS 0x02430
+#define IXGBE_RTTDCS 0x04900
+#define IXGBE_RTTDCS_ARBDIS 0x00000040 /* DCB arbiter disable */
+#define IXGBE_RTTPCS 0x0CD00
+#define IXGBE_RTRUP2TC 0x03020
+#define IXGBE_RTTUP2TC 0x0C800
+#define IXGBE_RTRPT4C(_i) (0x02140 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_TXLLQ(_i) (0x082E0 + ((_i) * 4)) /* 4 of these (0-3) */
+#define IXGBE_RTRPT4S(_i) (0x02160 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTDT2C(_i) (0x04910 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTDT2S(_i) (0x04930 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTPT2C(_i) (0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTPT2S(_i) (0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTDQSEL 0x04904
+#define IXGBE_RTTDT1C 0x04908
+#define IXGBE_RTTDT1S 0x0490C
+#define IXGBE_RTTDTECC 0x04990
+#define IXGBE_RTTDTECC_NO_BCN 0x00000100
+
+#define IXGBE_RTTBCNRC 0x04984
+#define IXGBE_RTTBCNRC_RS_ENA 0x80000000
+#define IXGBE_RTTBCNRC_RF_DEC_MASK 0x00003FFF
+#define IXGBE_RTTBCNRC_RF_INT_SHIFT 14
+#define IXGBE_RTTBCNRC_RF_INT_MASK \
+ (IXGBE_RTTBCNRC_RF_DEC_MASK << IXGBE_RTTBCNRC_RF_INT_SHIFT)
+#define IXGBE_RTTBCNRM 0x04980
+
+/* FCoE DMA Context Registers */
+#define IXGBE_FCPTRL 0x02410 /* FC User Desc. PTR Low */
+#define IXGBE_FCPTRH 0x02414 /* FC USer Desc. PTR High */
+#define IXGBE_FCBUFF 0x02418 /* FC Buffer Control */
+#define IXGBE_FCDMARW 0x02420 /* FC Receive DMA RW */
+#define IXGBE_FCINVST0 0x03FC0 /* FC Invalid DMA Context Status Reg 0*/
+#define IXGBE_FCINVST(_i) (IXGBE_FCINVST0 + ((_i) * 4))
+#define IXGBE_FCBUFF_VALID (1 << 0) /* DMA Context Valid */
+#define IXGBE_FCBUFF_BUFFSIZE (3 << 3) /* User Buffer Size */
+#define IXGBE_FCBUFF_WRCONTX (1 << 7) /* 0: Initiator, 1: Target */
+#define IXGBE_FCBUFF_BUFFCNT 0x0000ff00 /* Number of User Buffers */
+#define IXGBE_FCBUFF_OFFSET 0xffff0000 /* User Buffer Offset */
+#define IXGBE_FCBUFF_BUFFSIZE_SHIFT 3
+#define IXGBE_FCBUFF_BUFFCNT_SHIFT 8
+#define IXGBE_FCBUFF_OFFSET_SHIFT 16
+#define IXGBE_FCDMARW_WE (1 << 14) /* Write enable */
+#define IXGBE_FCDMARW_RE (1 << 15) /* Read enable */
+#define IXGBE_FCDMARW_FCOESEL 0x000001ff /* FC X_ID: 11 bits */
+#define IXGBE_FCDMARW_LASTSIZE 0xffff0000 /* Last User Buffer Size */
+#define IXGBE_FCDMARW_LASTSIZE_SHIFT 16
+/* FCoE SOF/EOF */
+#define IXGBE_TEOFF 0x04A94 /* Tx FC EOF */
+#define IXGBE_TSOFF 0x04A98 /* Tx FC SOF */
+#define IXGBE_REOFF 0x05158 /* Rx FC EOF */
+#define IXGBE_RSOFF 0x051F8 /* Rx FC SOF */
+/* FCoE Filter Context Registers */
+#define IXGBE_FCFLT 0x05108 /* FC FLT Context */
+#define IXGBE_FCFLTRW 0x05110 /* FC Filter RW Control */
+#define IXGBE_FCPARAM 0x051d8 /* FC Offset Parameter */
+#define IXGBE_FCFLT_VALID (1 << 0) /* Filter Context Valid */
+#define IXGBE_FCFLT_FIRST (1 << 1) /* Filter First */
+#define IXGBE_FCFLT_SEQID 0x00ff0000 /* Sequence ID */
+#define IXGBE_FCFLT_SEQCNT 0xff000000 /* Sequence Count */
+#define IXGBE_FCFLTRW_RVALDT (1 << 13) /* Fast Re-Validation */
+#define IXGBE_FCFLTRW_WE (1 << 14) /* Write Enable */
+#define IXGBE_FCFLTRW_RE (1 << 15) /* Read Enable */
+/* FCoE Receive Control */
+#define IXGBE_FCRXCTRL 0x05100 /* FC Receive Control */
+#define IXGBE_FCRXCTRL_FCOELLI (1 << 0) /* Low latency interrupt */
+#define IXGBE_FCRXCTRL_SAVBAD (1 << 1) /* Save Bad Frames */
+#define IXGBE_FCRXCTRL_FRSTRDH (1 << 2) /* EN 1st Read Header */
+#define IXGBE_FCRXCTRL_LASTSEQH (1 << 3) /* EN Last Header in Seq */
+#define IXGBE_FCRXCTRL_ALLH (1 << 4) /* EN All Headers */
+#define IXGBE_FCRXCTRL_FRSTSEQH (1 << 5) /* EN 1st Seq. Header */
+#define IXGBE_FCRXCTRL_ICRC (1 << 6) /* Ignore Bad FC CRC */
+#define IXGBE_FCRXCTRL_FCCRCBO (1 << 7) /* FC CRC Byte Ordering */
+#define IXGBE_FCRXCTRL_FCOEVER 0x00000f00 /* FCoE Version: 4 bits */
+#define IXGBE_FCRXCTRL_FCOEVER_SHIFT 8
+/* FCoE Redirection */
+#define IXGBE_FCRECTL 0x0ED00 /* FC Redirection Control */
+#define IXGBE_FCRETA0 0x0ED10 /* FC Redirection Table 0 */
+#define IXGBE_FCRETA(_i) (IXGBE_FCRETA0 + ((_i) * 4)) /* FCoE Redir */
+#define IXGBE_FCRECTL_ENA 0x1 /* FCoE Redir Table Enable */
+#define IXGBE_FCRETASEL_ENA 0x2 /* FCoE FCRETASEL bit */
+#define IXGBE_FCRETA_SIZE 8 /* Max entries in FCRETA */
+#define IXGBE_FCRETA_ENTRY_MASK 0x0000007f /* 7 bits for the queue index */
+
+/* Stats registers */
+#define IXGBE_CRCERRS 0x04000
+#define IXGBE_ILLERRC 0x04004
+#define IXGBE_ERRBC 0x04008
+#define IXGBE_MSPDC 0x04010
+#define IXGBE_MPC(_i) (0x03FA0 + ((_i) * 4)) /* 8 of these 3FA0-3FBC*/
+#define IXGBE_MLFC 0x04034
+#define IXGBE_MRFC 0x04038
+#define IXGBE_RLEC 0x04040
+#define IXGBE_LXONTXC 0x03F60
+#define IXGBE_LXONRXC 0x0CF60
+#define IXGBE_LXOFFTXC 0x03F68
+#define IXGBE_LXOFFRXC 0x0CF68
+#define IXGBE_LXONRXCNT 0x041A4
+#define IXGBE_LXOFFRXCNT 0x041A8
+#define IXGBE_PXONRXCNT(_i) (0x04140 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_PXOFFRXCNT(_i) (0x04160 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_PXON2OFFCNT(_i) (0x03240 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_PXONTXC(_i) (0x03F00 + ((_i) * 4)) /* 8 of these 3F00-3F1C*/
+#define IXGBE_PXONRXC(_i) (0x0CF00 + ((_i) * 4)) /* 8 of these CF00-CF1C*/
+#define IXGBE_PXOFFTXC(_i) (0x03F20 + ((_i) * 4)) /* 8 of these 3F20-3F3C*/
+#define IXGBE_PXOFFRXC(_i) (0x0CF20 + ((_i) * 4)) /* 8 of these CF20-CF3C*/
+#define IXGBE_PRC64 0x0405C
+#define IXGBE_PRC127 0x04060
+#define IXGBE_PRC255 0x04064
+#define IXGBE_PRC511 0x04068
+#define IXGBE_PRC1023 0x0406C
+#define IXGBE_PRC1522 0x04070
+#define IXGBE_GPRC 0x04074
+#define IXGBE_BPRC 0x04078
+#define IXGBE_MPRC 0x0407C
+#define IXGBE_GPTC 0x04080
+#define IXGBE_GORCL 0x04088
+#define IXGBE_GORCH 0x0408C
+#define IXGBE_GOTCL 0x04090
+#define IXGBE_GOTCH 0x04094
+#define IXGBE_RNBC(_i) (0x03FC0 + ((_i) * 4)) /* 8 of these 3FC0-3FDC*/
+#define IXGBE_RUC 0x040A4
+#define IXGBE_RFC 0x040A8
+#define IXGBE_ROC 0x040AC
+#define IXGBE_RJC 0x040B0
+#define IXGBE_MNGPRC 0x040B4
+#define IXGBE_MNGPDC 0x040B8
+#define IXGBE_MNGPTC 0x0CF90
+#define IXGBE_TORL 0x040C0
+#define IXGBE_TORH 0x040C4
+#define IXGBE_TPR 0x040D0
+#define IXGBE_TPT 0x040D4
+#define IXGBE_PTC64 0x040D8
+#define IXGBE_PTC127 0x040DC
+#define IXGBE_PTC255 0x040E0
+#define IXGBE_PTC511 0x040E4
+#define IXGBE_PTC1023 0x040E8
+#define IXGBE_PTC1522 0x040EC
+#define IXGBE_MPTC 0x040F0
+#define IXGBE_BPTC 0x040F4
+#define IXGBE_XEC 0x04120
+#define IXGBE_SSVPC 0x08780
+
+#define IXGBE_RQSMR(_i) (0x02300 + ((_i) * 4))
+#define IXGBE_TQSMR(_i) (((_i) <= 7) ? (0x07300 + ((_i) * 4)) : \
+ (0x08600 + ((_i) * 4)))
+#define IXGBE_TQSM(_i) (0x08600 + ((_i) * 4))
+
+#define IXGBE_QPRC(_i) (0x01030 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QPTC(_i) (0x06030 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBRC(_i) (0x01034 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBTC(_i) (0x06034 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBRC_L(_i) (0x01034 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBRC_H(_i) (0x01038 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QPRDC(_i) (0x01430 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBTC_L(_i) (0x08700 + ((_i) * 0x8)) /* 16 of these */
+#define IXGBE_QBTC_H(_i) (0x08704 + ((_i) * 0x8)) /* 16 of these */
+#define IXGBE_FCCRC 0x05118 /* Num of Good Eth CRC w/ Bad FC CRC */
+#define IXGBE_FCOERPDC 0x0241C /* FCoE Rx Packets Dropped Count */
+#define IXGBE_FCLAST 0x02424 /* FCoE Last Error Count */
+#define IXGBE_FCOEPRC 0x02428 /* Number of FCoE Packets Received */
+#define IXGBE_FCOEDWRC 0x0242C /* Number of FCoE DWords Received */
+#define IXGBE_FCOEPTC 0x08784 /* Number of FCoE Packets Transmitted */
+#define IXGBE_FCOEDWTC 0x08788 /* Number of FCoE DWords Transmitted */
+#define IXGBE_FCCRC_CNT_MASK 0x0000FFFF /* CRC_CNT: bit 0 - 15 */
+#define IXGBE_FCLAST_CNT_MASK 0x0000FFFF /* Last_CNT: bit 0 - 15 */
+#define IXGBE_O2BGPTC 0x041C4
+#define IXGBE_O2BSPC 0x087B0
+#define IXGBE_B2OSPC 0x041C0
+#define IXGBE_B2OGPRC 0x02F90
+#define IXGBE_BUPRC 0x04180
+#define IXGBE_BMPRC 0x04184
+#define IXGBE_BBPRC 0x04188
+#define IXGBE_BUPTC 0x0418C
+#define IXGBE_BMPTC 0x04190
+#define IXGBE_BBPTC 0x04194
+#define IXGBE_BCRCERRS 0x04198
+#define IXGBE_BXONRXC 0x0419C
+#define IXGBE_BXOFFRXC 0x041E0
+#define IXGBE_BXONTXC 0x041E4
+#define IXGBE_BXOFFTXC 0x041E8
+#define IXGBE_PCRC8ECL 0x0E810
+#define IXGBE_PCRC8ECH 0x0E811
+#define IXGBE_PCRC8ECH_MASK 0x1F
+#define IXGBE_LDPCECL 0x0E820
+#define IXGBE_LDPCECH 0x0E821
+
+/* Management */
+#define IXGBE_MAVTV(_i) (0x05010 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_MFUTP(_i) (0x05030 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_MANC 0x05820
+#define IXGBE_MFVAL 0x05824
+#define IXGBE_MANC2H 0x05860
+#define IXGBE_MDEF(_i) (0x05890 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_MIPAF 0x058B0
+#define IXGBE_MMAL(_i) (0x05910 + ((_i) * 8)) /* 4 of these (0-3) */
+#define IXGBE_MMAH(_i) (0x05914 + ((_i) * 8)) /* 4 of these (0-3) */
+#define IXGBE_FTFT 0x09400 /* 0x9400-0x97FC */
+#define IXGBE_METF(_i) (0x05190 + ((_i) * 4)) /* 4 of these (0-3) */
+#define IXGBE_MDEF_EXT(_i) (0x05160 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_LSWFW 0x15014
+#define IXGBE_BMCIP(_i) (0x05050 + ((_i) * 4)) /* 0x5050-0x505C */
+#define IXGBE_BMCIPVAL 0x05060
+#define IXGBE_BMCIP_IPADDR_TYPE 0x00000001
+#define IXGBE_BMCIP_IPADDR_VALID 0x00000002
+
+/* Management Bit Fields and Masks */
+#define IXGBE_MANC_EN_BMC2OS 0x10000000 /* Ena BMC2OS and OS2BMC traffic */
+#define IXGBE_MANC_EN_BMC2OS_SHIFT 28
+
+/* Firmware Semaphore Register */
+#define IXGBE_FWSM_MODE_MASK 0xE
+
+/* ARC Subsystem registers */
+#define IXGBE_HICR 0x15F00
+#define IXGBE_FWSTS 0x15F0C
+#define IXGBE_HSMC0R 0x15F04
+#define IXGBE_HSMC1R 0x15F08
+#define IXGBE_SWSR 0x15F10
+#define IXGBE_HFDR 0x15FE8
+#define IXGBE_FLEX_MNG 0x15800 /* 0x15800 - 0x15EFC */
+
+#define IXGBE_HICR_EN 0x01 /* Enable bit - RO */
+/* Driver sets this bit when done to put command in RAM */
+#define IXGBE_HICR_C 0x02
+#define IXGBE_HICR_SV 0x04 /* Status Validity */
+#define IXGBE_HICR_FW_RESET_ENABLE 0x40
+#define IXGBE_HICR_FW_RESET 0x80
+
+/* PCI-E registers */
+#define IXGBE_GCR 0x11000
+#define IXGBE_GTV 0x11004
+#define IXGBE_FUNCTAG 0x11008
+#define IXGBE_GLT 0x1100C
+#define IXGBE_PCIEPIPEADR 0x11004
+#define IXGBE_PCIEPIPEDAT 0x11008
+#define IXGBE_GSCL_1 0x11010
+#define IXGBE_GSCL_2 0x11014
+#define IXGBE_GSCL_3 0x11018
+#define IXGBE_GSCL_4 0x1101C
+#define IXGBE_GSCN_0 0x11020
+#define IXGBE_GSCN_1 0x11024
+#define IXGBE_GSCN_2 0x11028
+#define IXGBE_GSCN_3 0x1102C
+#define IXGBE_FACTPS 0x10150
+#define IXGBE_PCIEANACTL 0x11040
+#define IXGBE_SWSM 0x10140
+#define IXGBE_FWSM 0x10148
+#define IXGBE_GSSR 0x10160
+#define IXGBE_MREVID 0x11064
+#define IXGBE_DCA_ID 0x11070
+#define IXGBE_DCA_CTRL 0x11074
+#define IXGBE_SWFW_SYNC IXGBE_GSSR
+
+/* PCI-E registers 82599-Specific */
+#define IXGBE_GCR_EXT 0x11050
+#define IXGBE_GSCL_5_82599 0x11030
+#define IXGBE_GSCL_6_82599 0x11034
+#define IXGBE_GSCL_7_82599 0x11038
+#define IXGBE_GSCL_8_82599 0x1103C
+#define IXGBE_PHYADR_82599 0x11040
+#define IXGBE_PHYDAT_82599 0x11044
+#define IXGBE_PHYCTL_82599 0x11048
+#define IXGBE_PBACLR_82599 0x11068
+#define IXGBE_CIAA_82599 0x11088
+#define IXGBE_CIAD_82599 0x1108C
+#define IXGBE_PICAUSE 0x110B0
+#define IXGBE_PIENA 0x110B8
+#define IXGBE_CDQ_MBR_82599 0x110B4
+#define IXGBE_PCIESPARE 0x110BC
+#define IXGBE_MISC_REG_82599 0x110F0
+#define IXGBE_ECC_CTRL_0_82599 0x11100
+#define IXGBE_ECC_CTRL_1_82599 0x11104
+#define IXGBE_ECC_STATUS_82599 0x110E0
+#define IXGBE_BAR_CTRL_82599 0x110F4
+
+/* PCI Express Control */
+#define IXGBE_GCR_CMPL_TMOUT_MASK 0x0000F000
+#define IXGBE_GCR_CMPL_TMOUT_10ms 0x00001000
+#define IXGBE_GCR_CMPL_TMOUT_RESEND 0x00010000
+#define IXGBE_GCR_CAP_VER2 0x00040000
+
+#define IXGBE_GCR_EXT_MSIX_EN 0x80000000
+#define IXGBE_GCR_EXT_BUFFERS_CLEAR 0x40000000
+#define IXGBE_GCR_EXT_VT_MODE_16 0x00000001
+#define IXGBE_GCR_EXT_VT_MODE_32 0x00000002
+#define IXGBE_GCR_EXT_VT_MODE_64 0x00000003
+#define IXGBE_GCR_EXT_SRIOV (IXGBE_GCR_EXT_MSIX_EN | \
+ IXGBE_GCR_EXT_VT_MODE_64)
+/* Time Sync Registers */
+#define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */
+#define IXGBE_TSYNCTXCTL 0x08C00 /* Tx Time Sync Control register - RW */
+#define IXGBE_RXSTMPL 0x051E8 /* Rx timestamp Low - RO */
+#define IXGBE_RXSTMPH 0x051A4 /* Rx timestamp High - RO */
+#define IXGBE_RXSATRL 0x051A0 /* Rx timestamp attribute low - RO */
+#define IXGBE_RXSATRH 0x051A8 /* Rx timestamp attribute high - RO */
+#define IXGBE_RXMTRL 0x05120 /* RX message type register low - RW */
+#define IXGBE_TXSTMPL 0x08C04 /* Tx timestamp value Low - RO */
+#define IXGBE_TXSTMPH 0x08C08 /* Tx timestamp value High - RO */
+#define IXGBE_SYSTIML 0x08C0C /* System time register Low - RO */
+#define IXGBE_SYSTIMH 0x08C10 /* System time register High - RO */
+#define IXGBE_TIMINCA 0x08C14 /* Increment attributes register - RW */
+#define IXGBE_TIMADJL 0x08C18 /* Time Adjustment Offset register Low - RW */
+#define IXGBE_TIMADJH 0x08C1C /* Time Adjustment Offset register High - RW */
+#define IXGBE_TSAUXC 0x08C20 /* TimeSync Auxiliary Control register - RW */
+#define IXGBE_TRGTTIML0 0x08C24 /* Target Time Register 0 Low - RW */
+#define IXGBE_TRGTTIMH0 0x08C28 /* Target Time Register 0 High - RW */
+#define IXGBE_TRGTTIML1 0x08C2C /* Target Time Register 1 Low - RW */
+#define IXGBE_TRGTTIMH1 0x08C30 /* Target Time Register 1 High - RW */
+#define IXGBE_FREQOUT0 0x08C34 /* Frequency Out 0 Control register - RW */
+#define IXGBE_FREQOUT1 0x08C38 /* Frequency Out 1 Control register - RW */
+#define IXGBE_AUXSTMPL0 0x08C3C /* Auxiliary Time Stamp 0 register Low - RO */
+#define IXGBE_AUXSTMPH0 0x08C40 /* Auxiliary Time Stamp 0 register High - RO */
+#define IXGBE_AUXSTMPL1 0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */
+#define IXGBE_AUXSTMPH1 0x08C48 /* Auxiliary Time Stamp 1 register High - RO */
+
+/* Diagnostic Registers */
+#define IXGBE_RDSTATCTL 0x02C20
+#define IXGBE_RDSTAT(_i) (0x02C00 + ((_i) * 4)) /* 0x02C00-0x02C1C */
+#define IXGBE_RDHMPN 0x02F08
+#define IXGBE_RIC_DW(_i) (0x02F10 + ((_i) * 4))
+#define IXGBE_RDPROBE 0x02F20
+#define IXGBE_RDMAM 0x02F30
+#define IXGBE_RDMAD 0x02F34
+#define IXGBE_TDSTATCTL 0x07C20
+#define IXGBE_TDSTAT(_i) (0x07C00 + ((_i) * 4)) /* 0x07C00 - 0x07C1C */
+#define IXGBE_TDHMPN 0x07F08
+#define IXGBE_TDHMPN2 0x082FC
+#define IXGBE_TXDESCIC 0x082CC
+#define IXGBE_TIC_DW(_i) (0x07F10 + ((_i) * 4))
+#define IXGBE_TIC_DW2(_i) (0x082B0 + ((_i) * 4))
+#define IXGBE_TDPROBE 0x07F20
+#define IXGBE_TXBUFCTRL 0x0C600
+#define IXGBE_TXBUFDATA0 0x0C610
+#define IXGBE_TXBUFDATA1 0x0C614
+#define IXGBE_TXBUFDATA2 0x0C618
+#define IXGBE_TXBUFDATA3 0x0C61C
+#define IXGBE_RXBUFCTRL 0x03600
+#define IXGBE_RXBUFDATA0 0x03610
+#define IXGBE_RXBUFDATA1 0x03614
+#define IXGBE_RXBUFDATA2 0x03618
+#define IXGBE_RXBUFDATA3 0x0361C
+#define IXGBE_PCIE_DIAG(_i) (0x11090 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_RFVAL 0x050A4
+#define IXGBE_MDFTC1 0x042B8
+#define IXGBE_MDFTC2 0x042C0
+#define IXGBE_MDFTFIFO1 0x042C4
+#define IXGBE_MDFTFIFO2 0x042C8
+#define IXGBE_MDFTS 0x042CC
+#define IXGBE_RXDATAWRPTR(_i) (0x03700 + ((_i) * 4)) /* 8 of these 3700-370C*/
+#define IXGBE_RXDESCWRPTR(_i) (0x03710 + ((_i) * 4)) /* 8 of these 3710-371C*/
+#define IXGBE_RXDATARDPTR(_i) (0x03720 + ((_i) * 4)) /* 8 of these 3720-372C*/
+#define IXGBE_RXDESCRDPTR(_i) (0x03730 + ((_i) * 4)) /* 8 of these 3730-373C*/
+#define IXGBE_TXDATAWRPTR(_i) (0x0C700 + ((_i) * 4)) /* 8 of these C700-C70C*/
+#define IXGBE_TXDESCWRPTR(_i) (0x0C710 + ((_i) * 4)) /* 8 of these C710-C71C*/
+#define IXGBE_TXDATARDPTR(_i) (0x0C720 + ((_i) * 4)) /* 8 of these C720-C72C*/
+#define IXGBE_TXDESCRDPTR(_i) (0x0C730 + ((_i) * 4)) /* 8 of these C730-C73C*/
+#define IXGBE_PCIEECCCTL 0x1106C
+#define IXGBE_RXWRPTR(_i) (0x03100 + ((_i) * 4)) /* 8 of these 3100-310C*/
+#define IXGBE_RXUSED(_i) (0x03120 + ((_i) * 4)) /* 8 of these 3120-312C*/
+#define IXGBE_RXRDPTR(_i) (0x03140 + ((_i) * 4)) /* 8 of these 3140-314C*/
+#define IXGBE_RXRDWRPTR(_i) (0x03160 + ((_i) * 4)) /* 8 of these 3160-310C*/
+#define IXGBE_TXWRPTR(_i) (0x0C100 + ((_i) * 4)) /* 8 of these C100-C10C*/
+#define IXGBE_TXUSED(_i) (0x0C120 + ((_i) * 4)) /* 8 of these C120-C12C*/
+#define IXGBE_TXRDPTR(_i) (0x0C140 + ((_i) * 4)) /* 8 of these C140-C14C*/
+#define IXGBE_TXRDWRPTR(_i) (0x0C160 + ((_i) * 4)) /* 8 of these C160-C10C*/
+#define IXGBE_PCIEECCCTL0 0x11100
+#define IXGBE_PCIEECCCTL1 0x11104
+#define IXGBE_RXDBUECC 0x03F70
+#define IXGBE_TXDBUECC 0x0CF70
+#define IXGBE_RXDBUEST 0x03F74
+#define IXGBE_TXDBUEST 0x0CF74
+#define IXGBE_PBTXECC 0x0C300
+#define IXGBE_PBRXECC 0x03300
+#define IXGBE_GHECCR 0x110B0
+
+/* MAC Registers */
+#define IXGBE_PCS1GCFIG 0x04200
+#define IXGBE_PCS1GLCTL 0x04208
+#define IXGBE_PCS1GLSTA 0x0420C
+#define IXGBE_PCS1GDBG0 0x04210
+#define IXGBE_PCS1GDBG1 0x04214
+#define IXGBE_PCS1GANA 0x04218
+#define IXGBE_PCS1GANLP 0x0421C
+#define IXGBE_PCS1GANNP 0x04220
+#define IXGBE_PCS1GANLPNP 0x04224
+#define IXGBE_HLREG0 0x04240
+#define IXGBE_HLREG1 0x04244
+#define IXGBE_PAP 0x04248
+#define IXGBE_MACA 0x0424C
+#define IXGBE_APAE 0x04250
+#define IXGBE_ARD 0x04254
+#define IXGBE_AIS 0x04258
+#define IXGBE_MSCA 0x0425C
+#define IXGBE_MSRWD 0x04260
+#define IXGBE_MLADD 0x04264
+#define IXGBE_MHADD 0x04268
+#define IXGBE_MAXFRS 0x04268
+#define IXGBE_TREG 0x0426C
+#define IXGBE_PCSS1 0x04288
+#define IXGBE_PCSS2 0x0428C
+#define IXGBE_XPCSS 0x04290
+#define IXGBE_MFLCN 0x04294
+#define IXGBE_SERDESC 0x04298
+#define IXGBE_MACS 0x0429C
+#define IXGBE_AUTOC 0x042A0
+#define IXGBE_LINKS 0x042A4
+#define IXGBE_LINKS2 0x04324
+#define IXGBE_AUTOC2 0x042A8
+#define IXGBE_AUTOC3 0x042AC
+#define IXGBE_ANLP1 0x042B0
+#define IXGBE_ANLP2 0x042B4
+#define IXGBE_MACC 0x04330
+#define IXGBE_ATLASCTL 0x04800
+#define IXGBE_MMNGC 0x042D0
+#define IXGBE_ANLPNP1 0x042D4
+#define IXGBE_ANLPNP2 0x042D8
+#define IXGBE_KRPCSFC 0x042E0
+#define IXGBE_KRPCSS 0x042E4
+#define IXGBE_FECS1 0x042E8
+#define IXGBE_FECS2 0x042EC
+#define IXGBE_SMADARCTL 0x14F10
+#define IXGBE_MPVC 0x04318
+#define IXGBE_SGMIIC 0x04314
+
+/* Statistics Registers */
+#define IXGBE_RXNFGPC 0x041B0
+#define IXGBE_RXNFGBCL 0x041B4
+#define IXGBE_RXNFGBCH 0x041B8
+#define IXGBE_RXDGPC 0x02F50
+#define IXGBE_RXDGBCL 0x02F54
+#define IXGBE_RXDGBCH 0x02F58
+#define IXGBE_RXDDGPC 0x02F5C
+#define IXGBE_RXDDGBCL 0x02F60
+#define IXGBE_RXDDGBCH 0x02F64
+#define IXGBE_RXLPBKGPC 0x02F68
+#define IXGBE_RXLPBKGBCL 0x02F6C
+#define IXGBE_RXLPBKGBCH 0x02F70
+#define IXGBE_RXDLPBKGPC 0x02F74
+#define IXGBE_RXDLPBKGBCL 0x02F78
+#define IXGBE_RXDLPBKGBCH 0x02F7C
+#define IXGBE_TXDGPC 0x087A0
+#define IXGBE_TXDGBCL 0x087A4
+#define IXGBE_TXDGBCH 0x087A8
+
+#define IXGBE_RXDSTATCTRL 0x02F40
+
+/* Copper Pond 2 link timeout */
+#define IXGBE_VALIDATE_LINK_READY_TIMEOUT 50
+
+/* Omer CORECTL */
+#define IXGBE_CORECTL 0x014F00
+/* BARCTRL */
+#define IXGBE_BARCTRL 0x110F4
+#define IXGBE_BARCTRL_FLSIZE 0x0700
+#define IXGBE_BARCTRL_FLSIZE_SHIFT 8
+#define IXGBE_BARCTRL_CSRSIZE 0x2000
+
+/* RSCCTL Bit Masks */
+#define IXGBE_RSCCTL_RSCEN 0x01
+#define IXGBE_RSCCTL_MAXDESC_1 0x00
+#define IXGBE_RSCCTL_MAXDESC_4 0x04
+#define IXGBE_RSCCTL_MAXDESC_8 0x08
+#define IXGBE_RSCCTL_MAXDESC_16 0x0C
+
+/* RSCDBU Bit Masks */
+#define IXGBE_RSCDBU_RSCSMALDIS_MASK 0x0000007F
+#define IXGBE_RSCDBU_RSCACKDIS 0x00000080
+
+/* RDRXCTL Bit Masks */
+#define IXGBE_RDRXCTL_RDMTS_1_2 0x00000000 /* Rx Desc Min THLD Size */
+#define IXGBE_RDRXCTL_CRCSTRIP 0x00000002 /* CRC Strip */
+#define IXGBE_RDRXCTL_MVMEN 0x00000020
+#define IXGBE_RDRXCTL_DMAIDONE 0x00000008 /* DMA init cycle done */
+#define IXGBE_RDRXCTL_AGGDIS 0x00010000 /* Aggregation disable */
+#define IXGBE_RDRXCTL_RSCFRSTSIZE 0x003E0000 /* RSC First packet size */
+#define IXGBE_RDRXCTL_RSCLLIDIS 0x00800000 /* Disabl RSC compl on LLI */
+#define IXGBE_RDRXCTL_RSCACKC 0x02000000 /* must set 1 when RSC ena */
+#define IXGBE_RDRXCTL_FCOE_WRFIX 0x04000000 /* must set 1 when RSC ena */
+
+/* RQTC Bit Masks and Shifts */
+#define IXGBE_RQTC_SHIFT_TC(_i) ((_i) * 4)
+#define IXGBE_RQTC_TC0_MASK (0x7 << 0)
+#define IXGBE_RQTC_TC1_MASK (0x7 << 4)
+#define IXGBE_RQTC_TC2_MASK (0x7 << 8)
+#define IXGBE_RQTC_TC3_MASK (0x7 << 12)
+#define IXGBE_RQTC_TC4_MASK (0x7 << 16)
+#define IXGBE_RQTC_TC5_MASK (0x7 << 20)
+#define IXGBE_RQTC_TC6_MASK (0x7 << 24)
+#define IXGBE_RQTC_TC7_MASK (0x7 << 28)
+
+/* PSRTYPE.RQPL Bit masks and shift */
+#define IXGBE_PSRTYPE_RQPL_MASK 0x7
+#define IXGBE_PSRTYPE_RQPL_SHIFT 29
+
+/* CTRL Bit Masks */
+#define IXGBE_CTRL_GIO_DIS 0x00000004 /* Global IO Master Disable bit */
+#define IXGBE_CTRL_LNK_RST 0x00000008 /* Link Reset. Resets everything. */
+#define IXGBE_CTRL_RST 0x04000000 /* Reset (SW) */
+#define IXGBE_CTRL_RST_MASK (IXGBE_CTRL_LNK_RST | IXGBE_CTRL_RST)
+
+/* FACTPS */
+#define IXGBE_FACTPS_LFS 0x40000000 /* LAN Function Select */
+
+/* MHADD Bit Masks */
+#define IXGBE_MHADD_MFS_MASK 0xFFFF0000
+#define IXGBE_MHADD_MFS_SHIFT 16
+
+/* Extended Device Control */
+#define IXGBE_CTRL_EXT_PFRSTD 0x00004000 /* Physical Function Reset Done */
+#define IXGBE_CTRL_EXT_NS_DIS 0x00010000 /* No Snoop disable */
+#define IXGBE_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */
+#define IXGBE_CTRL_EXT_DRV_LOAD 0x10000000 /* Driver loaded bit for FW */
+
+/* Direct Cache Access (DCA) definitions */
+#define IXGBE_DCA_CTRL_DCA_ENABLE 0x00000000 /* DCA Enable */
+#define IXGBE_DCA_CTRL_DCA_DISABLE 0x00000001 /* DCA Disable */
+
+#define IXGBE_DCA_CTRL_DCA_MODE_CB1 0x00 /* DCA Mode CB1 */
+#define IXGBE_DCA_CTRL_DCA_MODE_CB2 0x02 /* DCA Mode CB2 */
+
+#define IXGBE_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */
+#define IXGBE_DCA_RXCTRL_CPUID_MASK_82599 0xFF000000 /* Rx CPUID Mask */
+#define IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599 24 /* Rx CPUID Shift */
+#define IXGBE_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* Rx Desc enable */
+#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* Rx Desc header ena */
+#define IXGBE_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* Rx Desc payload ena */
+#define IXGBE_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* Rx rd Desc Relax Order */
+#define IXGBE_DCA_RXCTRL_DATA_WRO_EN (1 << 13) /* Rx wr data Relax Order */
+#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN (1 << 15) /* Rx wr header RO */
+
+#define IXGBE_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */
+#define IXGBE_DCA_TXCTRL_CPUID_MASK_82599 0xFF000000 /* Tx CPUID Mask */
+#define IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599 24 /* Tx CPUID Shift */
+#define IXGBE_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */
+#define IXGBE_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */
+#define IXGBE_DCA_TXCTRL_DESC_WRO_EN (1 << 11) /* Tx Desc writeback RO bit */
+#define IXGBE_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */
+#define IXGBE_DCA_MAX_QUEUES_82598 16 /* DCA regs only on 16 queues */
+
+/* MSCA Bit Masks */
+#define IXGBE_MSCA_NP_ADDR_MASK 0x0000FFFF /* MDI Addr (new prot) */
+#define IXGBE_MSCA_NP_ADDR_SHIFT 0
+#define IXGBE_MSCA_DEV_TYPE_MASK 0x001F0000 /* Dev Type (new prot) */
+#define IXGBE_MSCA_DEV_TYPE_SHIFT 16 /* Register Address (old prot */
+#define IXGBE_MSCA_PHY_ADDR_MASK 0x03E00000 /* PHY Address mask */
+#define IXGBE_MSCA_PHY_ADDR_SHIFT 21 /* PHY Address shift*/
+#define IXGBE_MSCA_OP_CODE_MASK 0x0C000000 /* OP CODE mask */
+#define IXGBE_MSCA_OP_CODE_SHIFT 26 /* OP CODE shift */
+#define IXGBE_MSCA_ADDR_CYCLE 0x00000000 /* OP CODE 00 (addr cycle) */
+#define IXGBE_MSCA_WRITE 0x04000000 /* OP CODE 01 (wr) */
+#define IXGBE_MSCA_READ 0x0C000000 /* OP CODE 11 (rd) */
+#define IXGBE_MSCA_READ_AUTOINC 0x08000000 /* OP CODE 10 (rd auto inc)*/
+#define IXGBE_MSCA_ST_CODE_MASK 0x30000000 /* ST Code mask */
+#define IXGBE_MSCA_ST_CODE_SHIFT 28 /* ST Code shift */
+#define IXGBE_MSCA_NEW_PROTOCOL 0x00000000 /* ST CODE 00 (new prot) */
+#define IXGBE_MSCA_OLD_PROTOCOL 0x10000000 /* ST CODE 01 (old prot) */
+#define IXGBE_MSCA_MDI_COMMAND 0x40000000 /* Initiate MDI command */
+#define IXGBE_MSCA_MDI_IN_PROG_EN 0x80000000 /* MDI in progress ena */
+
+/* MSRWD bit masks */
+#define IXGBE_MSRWD_WRITE_DATA_MASK 0x0000FFFF
+#define IXGBE_MSRWD_WRITE_DATA_SHIFT 0
+#define IXGBE_MSRWD_READ_DATA_MASK 0xFFFF0000
+#define IXGBE_MSRWD_READ_DATA_SHIFT 16
+
+/* Atlas registers */
+#define IXGBE_ATLAS_PDN_LPBK 0x24
+#define IXGBE_ATLAS_PDN_10G 0xB
+#define IXGBE_ATLAS_PDN_1G 0xC
+#define IXGBE_ATLAS_PDN_AN 0xD
+
+/* Atlas bit masks */
+#define IXGBE_ATLASCTL_WRITE_CMD 0x00010000
+#define IXGBE_ATLAS_PDN_TX_REG_EN 0x10
+#define IXGBE_ATLAS_PDN_TX_10G_QL_ALL 0xF0
+#define IXGBE_ATLAS_PDN_TX_1G_QL_ALL 0xF0
+#define IXGBE_ATLAS_PDN_TX_AN_QL_ALL 0xF0
+
+/* Omer bit masks */
+#define IXGBE_CORECTL_WRITE_CMD 0x00010000
+
+/* Device Type definitions for new protocol MDIO commands */
+#define IXGBE_MDIO_PMA_PMD_DEV_TYPE 0x1
+#define IXGBE_MDIO_PCS_DEV_TYPE 0x3
+#define IXGBE_MDIO_PHY_XS_DEV_TYPE 0x4
+#define IXGBE_MDIO_AUTO_NEG_DEV_TYPE 0x7
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE 0x1E /* Device 30 */
+#define IXGBE_TWINAX_DEV 1
+
+#define IXGBE_MDIO_COMMAND_TIMEOUT 100 /* PHY Timeout for 1 GB mode */
+
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL 0x0 /* VS1 Ctrl Reg */
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS 0x1 /* VS1 Status Reg */
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS 0x0008 /* 1 = Link Up */
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS 0x0010 /* 0-10G, 1-1G */
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_10G_SPEED 0x0018
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_1G_SPEED 0x0010
+
+#define IXGBE_MDIO_AUTO_NEG_CONTROL 0x0 /* AUTO_NEG Control Reg */
+#define IXGBE_MDIO_AUTO_NEG_STATUS 0x1 /* AUTO_NEG Status Reg */
+#define IXGBE_MDIO_AUTO_NEG_ADVT 0x10 /* AUTO_NEG Advt Reg */
+#define IXGBE_MDIO_AUTO_NEG_LP 0x13 /* AUTO_NEG LP Status Reg */
+#define IXGBE_MDIO_PHY_XS_CONTROL 0x0 /* PHY_XS Control Reg */
+#define IXGBE_MDIO_PHY_XS_RESET 0x8000 /* PHY_XS Reset */
+#define IXGBE_MDIO_PHY_ID_HIGH 0x2 /* PHY ID High Reg*/
+#define IXGBE_MDIO_PHY_ID_LOW 0x3 /* PHY ID Low Reg*/
+#define IXGBE_MDIO_PHY_SPEED_ABILITY 0x4 /* Speed Ability Reg */
+#define IXGBE_MDIO_PHY_SPEED_10G 0x0001 /* 10G capable */
+#define IXGBE_MDIO_PHY_SPEED_1G 0x0010 /* 1G capable */
+#define IXGBE_MDIO_PHY_SPEED_100M 0x0020 /* 100M capable */
+#define IXGBE_MDIO_PHY_EXT_ABILITY 0xB /* Ext Ability Reg */
+#define IXGBE_MDIO_PHY_10GBASET_ABILITY 0x0004 /* 10GBaseT capable */
+#define IXGBE_MDIO_PHY_1000BASET_ABILITY 0x0020 /* 1000BaseT capable */
+#define IXGBE_MDIO_PHY_100BASETX_ABILITY 0x0080 /* 100BaseTX capable */
+#define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE 0x0800 /* Set low power mode */
+
+#define IXGBE_MDIO_PMA_PMD_CONTROL_ADDR 0x0000 /* PMA/PMD Control Reg */
+#define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */
+#define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */
+#define IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT 0xC30C /* PHY_XS SDA/SCL Status Reg */
+
+/* MII clause 22/28 definitions */
+#define IXGBE_MDIO_PHY_LOW_POWER_MODE 0x0800
+
+#define IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG 0x20 /* 10G Control Reg */
+#define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400 /* 1G Provisioning 1 */
+#define IXGBE_MII_AUTONEG_XNP_TX_REG 0x17 /* 1G XNP Transmit */
+#define IXGBE_MII_AUTONEG_ADVERTISE_REG 0x10 /* 100M Advertisement */
+#define IXGBE_MII_10GBASE_T_ADVERTISE 0x1000 /* full duplex, bit:12*/
+#define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX 0x4000 /* full duplex, bit:14*/
+#define IXGBE_MII_1GBASE_T_ADVERTISE 0x8000 /* full duplex, bit:15*/
+#define IXGBE_MII_100BASE_T_ADVERTISE 0x0100 /* full duplex, bit:8 */
+#define IXGBE_MII_100BASE_T_ADVERTISE_HALF 0x0080 /* half duplex, bit:7 */
+#define IXGBE_MII_RESTART 0x200
+#define IXGBE_MII_AUTONEG_COMPLETE 0x20
+#define IXGBE_MII_AUTONEG_LINK_UP 0x04
+#define IXGBE_MII_AUTONEG_REG 0x0
+
+#define IXGBE_PHY_REVISION_MASK 0xFFFFFFF0
+#define IXGBE_MAX_PHY_ADDR 32
+
+/* PHY IDs*/
+#define TN1010_PHY_ID 0x00A19410
+#define TNX_FW_REV 0xB
+#define X540_PHY_ID 0x01540200
+#define AQ_FW_REV 0x20
+#define QT2022_PHY_ID 0x0043A400
+#define ATH_PHY_ID 0x03429050
+
+/* PHY Types */
+#define IXGBE_M88E1145_E_PHY_ID 0x01410CD0
+
+/* Special PHY Init Routine */
+#define IXGBE_PHY_INIT_OFFSET_NL 0x002B
+#define IXGBE_PHY_INIT_END_NL 0xFFFF
+#define IXGBE_CONTROL_MASK_NL 0xF000
+#define IXGBE_DATA_MASK_NL 0x0FFF
+#define IXGBE_CONTROL_SHIFT_NL 12
+#define IXGBE_DELAY_NL 0
+#define IXGBE_DATA_NL 1
+#define IXGBE_CONTROL_NL 0x000F
+#define IXGBE_CONTROL_EOL_NL 0x0FFF
+#define IXGBE_CONTROL_SOL_NL 0x0000
+
+/* General purpose Interrupt Enable */
+#define IXGBE_SDP0_GPIEN 0x00000001 /* SDP0 */
+#define IXGBE_SDP1_GPIEN 0x00000002 /* SDP1 */
+#define IXGBE_SDP2_GPIEN 0x00000004 /* SDP2 */
+#define IXGBE_GPIE_MSIX_MODE 0x00000010 /* MSI-X mode */
+#define IXGBE_GPIE_OCD 0x00000020 /* Other Clear Disable */
+#define IXGBE_GPIE_EIMEN 0x00000040 /* Immediate Interrupt Enable */
+#define IXGBE_GPIE_EIAME 0x40000000
+#define IXGBE_GPIE_PBA_SUPPORT 0x80000000
+#define IXGBE_GPIE_RSC_DELAY_SHIFT 11
+#define IXGBE_GPIE_VTMODE_MASK 0x0000C000 /* VT Mode Mask */
+#define IXGBE_GPIE_VTMODE_16 0x00004000 /* 16 VFs 8 queues per VF */
+#define IXGBE_GPIE_VTMODE_32 0x00008000 /* 32 VFs 4 queues per VF */
+#define IXGBE_GPIE_VTMODE_64 0x0000C000 /* 64 VFs 2 queues per VF */
+
+/* Packet Buffer Initialization */
+#define IXGBE_MAX_PACKET_BUFFERS 8
+
+#define IXGBE_TXPBSIZE_20KB 0x00005000 /* 20KB Packet Buffer */
+#define IXGBE_TXPBSIZE_40KB 0x0000A000 /* 40KB Packet Buffer */
+#define IXGBE_RXPBSIZE_48KB 0x0000C000 /* 48KB Packet Buffer */
+#define IXGBE_RXPBSIZE_64KB 0x00010000 /* 64KB Packet Buffer */
+#define IXGBE_RXPBSIZE_80KB 0x00014000 /* 80KB Packet Buffer */
+#define IXGBE_RXPBSIZE_128KB 0x00020000 /* 128KB Packet Buffer */
+#define IXGBE_RXPBSIZE_MAX 0x00080000 /* 512KB Packet Buffer */
+#define IXGBE_TXPBSIZE_MAX 0x00028000 /* 160KB Packet Buffer */
+
+#define IXGBE_TXPKT_SIZE_MAX 0xA /* Max Tx Packet size */
+#define IXGBE_MAX_PB 8
+
+/* Packet buffer allocation strategies */
+enum {
+ PBA_STRATEGY_EQUAL = 0, /* Distribute PB space equally */
+#define PBA_STRATEGY_EQUAL PBA_STRATEGY_EQUAL
+ PBA_STRATEGY_WEIGHTED = 1, /* Weight front half of TCs */
+#define PBA_STRATEGY_WEIGHTED PBA_STRATEGY_WEIGHTED
+};
+
+/* Transmit Flow Control status */
+#define IXGBE_TFCS_TXOFF 0x00000001
+#define IXGBE_TFCS_TXOFF0 0x00000100
+#define IXGBE_TFCS_TXOFF1 0x00000200
+#define IXGBE_TFCS_TXOFF2 0x00000400
+#define IXGBE_TFCS_TXOFF3 0x00000800
+#define IXGBE_TFCS_TXOFF4 0x00001000
+#define IXGBE_TFCS_TXOFF5 0x00002000
+#define IXGBE_TFCS_TXOFF6 0x00004000
+#define IXGBE_TFCS_TXOFF7 0x00008000
+
+/* TCP Timer */
+#define IXGBE_TCPTIMER_KS 0x00000100
+#define IXGBE_TCPTIMER_COUNT_ENABLE 0x00000200
+#define IXGBE_TCPTIMER_COUNT_FINISH 0x00000400
+#define IXGBE_TCPTIMER_LOOP 0x00000800
+#define IXGBE_TCPTIMER_DURATION_MASK 0x000000FF
+
+/* HLREG0 Bit Masks */
+#define IXGBE_HLREG0_TXCRCEN 0x00000001 /* bit 0 */
+#define IXGBE_HLREG0_RXCRCSTRP 0x00000002 /* bit 1 */
+#define IXGBE_HLREG0_JUMBOEN 0x00000004 /* bit 2 */
+#define IXGBE_HLREG0_TXPADEN 0x00000400 /* bit 10 */
+#define IXGBE_HLREG0_TXPAUSEEN 0x00001000 /* bit 12 */
+#define IXGBE_HLREG0_RXPAUSEEN 0x00004000 /* bit 14 */
+#define IXGBE_HLREG0_LPBK 0x00008000 /* bit 15 */
+#define IXGBE_HLREG0_MDCSPD 0x00010000 /* bit 16 */
+#define IXGBE_HLREG0_CONTMDC 0x00020000 /* bit 17 */
+#define IXGBE_HLREG0_CTRLFLTR 0x00040000 /* bit 18 */
+#define IXGBE_HLREG0_PREPEND 0x00F00000 /* bits 20-23 */
+#define IXGBE_HLREG0_PRIPAUSEEN 0x01000000 /* bit 24 */
+#define IXGBE_HLREG0_RXPAUSERECDA 0x06000000 /* bits 25-26 */
+#define IXGBE_HLREG0_RXLNGTHERREN 0x08000000 /* bit 27 */
+#define IXGBE_HLREG0_RXPADSTRIPEN 0x10000000 /* bit 28 */
+
+/* VMD_CTL bitmasks */
+#define IXGBE_VMD_CTL_VMDQ_EN 0x00000001
+#define IXGBE_VMD_CTL_VMDQ_FILTER 0x00000002
+
+/* VT_CTL bitmasks */
+#define IXGBE_VT_CTL_DIS_DEFPL 0x20000000 /* disable default pool */
+#define IXGBE_VT_CTL_REPLEN 0x40000000 /* replication enabled */
+#define IXGBE_VT_CTL_VT_ENABLE 0x00000001 /* Enable VT Mode */
+#define IXGBE_VT_CTL_POOL_SHIFT 7
+#define IXGBE_VT_CTL_POOL_MASK (0x3F << IXGBE_VT_CTL_POOL_SHIFT)
+
+/* VMOLR bitmasks */
+#define IXGBE_VMOLR_AUPE 0x01000000 /* accept untagged packets */
+#define IXGBE_VMOLR_ROMPE 0x02000000 /* accept packets in MTA tbl */
+#define IXGBE_VMOLR_ROPE 0x04000000 /* accept packets in UC tbl */
+#define IXGBE_VMOLR_BAM 0x08000000 /* accept broadcast packets */
+#define IXGBE_VMOLR_MPE 0x10000000 /* multicast promiscuous */
+
+/* VFRE bitmask */
+#define IXGBE_VFRE_ENABLE_ALL 0xFFFFFFFF
+
+#define IXGBE_VF_INIT_TIMEOUT 200 /* Number of retries to clear RSTI */
+
+/* RDHMPN and TDHMPN bitmasks */
+#define IXGBE_RDHMPN_RDICADDR 0x007FF800
+#define IXGBE_RDHMPN_RDICRDREQ 0x00800000
+#define IXGBE_RDHMPN_RDICADDR_SHIFT 11
+#define IXGBE_TDHMPN_TDICADDR 0x003FF800
+#define IXGBE_TDHMPN_TDICRDREQ 0x00800000
+#define IXGBE_TDHMPN_TDICADDR_SHIFT 11
+
+#define IXGBE_RDMAM_MEM_SEL_SHIFT 13
+#define IXGBE_RDMAM_DWORD_SHIFT 9
+#define IXGBE_RDMAM_DESC_COMP_FIFO 1
+#define IXGBE_RDMAM_DFC_CMD_FIFO 2
+#define IXGBE_RDMAM_RSC_HEADER_ADDR 3
+#define IXGBE_RDMAM_TCN_STATUS_RAM 4
+#define IXGBE_RDMAM_WB_COLL_FIFO 5
+#define IXGBE_RDMAM_QSC_CNT_RAM 6
+#define IXGBE_RDMAM_QSC_FCOE_RAM 7
+#define IXGBE_RDMAM_QSC_QUEUE_CNT 8
+#define IXGBE_RDMAM_QSC_QUEUE_RAM 0xA
+#define IXGBE_RDMAM_QSC_RSC_RAM 0xB
+#define IXGBE_RDMAM_DESC_COM_FIFO_RANGE 135
+#define IXGBE_RDMAM_DESC_COM_FIFO_COUNT 4
+#define IXGBE_RDMAM_DFC_CMD_FIFO_RANGE 48
+#define IXGBE_RDMAM_DFC_CMD_FIFO_COUNT 7
+#define IXGBE_RDMAM_RSC_HEADER_ADDR_RANGE 32
+#define IXGBE_RDMAM_RSC_HEADER_ADDR_COUNT 4
+#define IXGBE_RDMAM_TCN_STATUS_RAM_RANGE 256
+#define IXGBE_RDMAM_TCN_STATUS_RAM_COUNT 9
+#define IXGBE_RDMAM_WB_COLL_FIFO_RANGE 8
+#define IXGBE_RDMAM_WB_COLL_FIFO_COUNT 4
+#define IXGBE_RDMAM_QSC_CNT_RAM_RANGE 64
+#define IXGBE_RDMAM_QSC_CNT_RAM_COUNT 4
+#define IXGBE_RDMAM_QSC_FCOE_RAM_RANGE 512
+#define IXGBE_RDMAM_QSC_FCOE_RAM_COUNT 5
+#define IXGBE_RDMAM_QSC_QUEUE_CNT_RANGE 32
+#define IXGBE_RDMAM_QSC_QUEUE_CNT_COUNT 4
+#define IXGBE_RDMAM_QSC_QUEUE_RAM_RANGE 128
+#define IXGBE_RDMAM_QSC_QUEUE_RAM_COUNT 8
+#define IXGBE_RDMAM_QSC_RSC_RAM_RANGE 32
+#define IXGBE_RDMAM_QSC_RSC_RAM_COUNT 8
+
+#define IXGBE_TXDESCIC_READY 0x80000000
+
+/* Receive Checksum Control */
+#define IXGBE_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */
+#define IXGBE_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */
+
+/* FCRTL Bit Masks */
+#define IXGBE_FCRTL_XONE 0x80000000 /* XON enable */
+#define IXGBE_FCRTH_FCEN 0x80000000 /* Packet buffer fc enable */
+
+/* PAP bit masks*/
+#define IXGBE_PAP_TXPAUSECNT_MASK 0x0000FFFF /* Pause counter mask */
+
+/* RMCS Bit Masks */
+#define IXGBE_RMCS_RRM 0x00000002 /* Rx Recycle Mode enable */
+/* Receive Arbitration Control: 0 Round Robin, 1 DFP */
+#define IXGBE_RMCS_RAC 0x00000004
+/* Deficit Fixed Prio ena */
+#define IXGBE_RMCS_DFP IXGBE_RMCS_RAC
+#define IXGBE_RMCS_TFCE_802_3X 0x00000008 /* Tx Priority FC ena */
+#define IXGBE_RMCS_TFCE_PRIORITY 0x00000010 /* Tx Priority FC ena */
+#define IXGBE_RMCS_ARBDIS 0x00000040 /* Arbitration disable bit */
+
+/* FCCFG Bit Masks */
+#define IXGBE_FCCFG_TFCE_802_3X 0x00000008 /* Tx link FC enable */
+#define IXGBE_FCCFG_TFCE_PRIORITY 0x00000010 /* Tx priority FC enable */
+
+/* Interrupt register bitmasks */
+
+/* Extended Interrupt Cause Read */
+#define IXGBE_EICR_RTX_QUEUE 0x0000FFFF /* RTx Queue Interrupt */
+#define IXGBE_EICR_FLOW_DIR 0x00010000 /* FDir Exception */
+#define IXGBE_EICR_RX_MISS 0x00020000 /* Packet Buffer Overrun */
+#define IXGBE_EICR_PCI 0x00040000 /* PCI Exception */
+#define IXGBE_EICR_MAILBOX 0x00080000 /* VF to PF Mailbox Interrupt */
+#define IXGBE_EICR_LSC 0x00100000 /* Link Status Change */
+#define IXGBE_EICR_LINKSEC 0x00200000 /* PN Threshold */
+#define IXGBE_EICR_MNG 0x00400000 /* Manageability Event Interrupt */
+#define IXGBE_EICR_TS 0x00800000 /* Thermal Sensor Event */
+#define IXGBE_EICR_GPI_SDP0 0x01000000 /* Gen Purpose Interrupt on SDP0 */
+#define IXGBE_EICR_GPI_SDP1 0x02000000 /* Gen Purpose Interrupt on SDP1 */
+#define IXGBE_EICR_GPI_SDP2 0x04000000 /* Gen Purpose Interrupt on SDP2 */
+#define IXGBE_EICR_ECC 0x10000000 /* ECC Error */
+#define IXGBE_EICR_PBUR 0x10000000 /* Packet Buffer Handler Error */
+#define IXGBE_EICR_DHER 0x20000000 /* Descriptor Handler Error */
+#define IXGBE_EICR_TCP_TIMER 0x40000000 /* TCP Timer */
+#define IXGBE_EICR_OTHER 0x80000000 /* Interrupt Cause Active */
+
+/* Extended Interrupt Cause Set */
+#define IXGBE_EICS_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
+#define IXGBE_EICS_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */
+#define IXGBE_EICS_RX_MISS IXGBE_EICR_RX_MISS /* Pkt Buffer Overrun */
+#define IXGBE_EICS_PCI IXGBE_EICR_PCI /* PCI Exception */
+#define IXGBE_EICS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */
+#define IXGBE_EICS_LSC IXGBE_EICR_LSC /* Link Status Change */
+#define IXGBE_EICS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */
+#define IXGBE_EICS_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */
+#define IXGBE_EICS_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */
+#define IXGBE_EICS_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */
+#define IXGBE_EICS_ECC IXGBE_EICR_ECC /* ECC Error */
+#define IXGBE_EICS_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */
+#define IXGBE_EICS_DHER IXGBE_EICR_DHER /* Desc Handler Error */
+#define IXGBE_EICS_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */
+#define IXGBE_EICS_OTHER IXGBE_EICR_OTHER /* INT Cause Active */
+
+/* Extended Interrupt Mask Set */
+#define IXGBE_EIMS_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
+#define IXGBE_EIMS_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */
+#define IXGBE_EIMS_RX_MISS IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */
+#define IXGBE_EIMS_PCI IXGBE_EICR_PCI /* PCI Exception */
+#define IXGBE_EIMS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */
+#define IXGBE_EIMS_LSC IXGBE_EICR_LSC /* Link Status Change */
+#define IXGBE_EIMS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */
+#define IXGBE_EIMS_TS IXGBE_EICR_TS /* Thermal Sensor Event */
+#define IXGBE_EIMS_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */
+#define IXGBE_EIMS_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */
+#define IXGBE_EIMS_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */
+#define IXGBE_EIMS_ECC IXGBE_EICR_ECC /* ECC Error */
+#define IXGBE_EIMS_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */
+#define IXGBE_EIMS_DHER IXGBE_EICR_DHER /* Descr Handler Error */
+#define IXGBE_EIMS_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */
+#define IXGBE_EIMS_OTHER IXGBE_EICR_OTHER /* INT Cause Active */
+
+/* Extended Interrupt Mask Clear */
+#define IXGBE_EIMC_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
+#define IXGBE_EIMC_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */
+#define IXGBE_EIMC_RX_MISS IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */
+#define IXGBE_EIMC_PCI IXGBE_EICR_PCI /* PCI Exception */
+#define IXGBE_EIMC_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */
+#define IXGBE_EIMC_LSC IXGBE_EICR_LSC /* Link Status Change */
+#define IXGBE_EIMC_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */
+#define IXGBE_EIMC_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */
+#define IXGBE_EIMC_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */
+#define IXGBE_EIMC_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */
+#define IXGBE_EIMC_ECC IXGBE_EICR_ECC /* ECC Error */
+#define IXGBE_EIMC_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */
+#define IXGBE_EIMC_DHER IXGBE_EICR_DHER /* Desc Handler Err */
+#define IXGBE_EIMC_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */
+#define IXGBE_EIMC_OTHER IXGBE_EICR_OTHER /* INT Cause Active */
+
+#define IXGBE_EIMS_ENABLE_MASK ( \
+ IXGBE_EIMS_RTX_QUEUE | \
+ IXGBE_EIMS_LSC | \
+ IXGBE_EIMS_TCP_TIMER | \
+ IXGBE_EIMS_OTHER)
+
+/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
+#define IXGBE_IMIR_PORT_IM_EN 0x00010000 /* TCP port enable */
+#define IXGBE_IMIR_PORT_BP 0x00020000 /* TCP port check bypass */
+#define IXGBE_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */
+#define IXGBE_IMIREXT_CTRL_URG 0x00002000 /* Check URG bit in header */
+#define IXGBE_IMIREXT_CTRL_ACK 0x00004000 /* Check ACK bit in header */
+#define IXGBE_IMIREXT_CTRL_PSH 0x00008000 /* Check PSH bit in header */
+#define IXGBE_IMIREXT_CTRL_RST 0x00010000 /* Check RST bit in header */
+#define IXGBE_IMIREXT_CTRL_SYN 0x00020000 /* Check SYN bit in header */
+#define IXGBE_IMIREXT_CTRL_FIN 0x00040000 /* Check FIN bit in header */
+#define IXGBE_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of control bits */
+#define IXGBE_IMIR_SIZE_BP_82599 0x00001000 /* Packet size bypass */
+#define IXGBE_IMIR_CTRL_URG_82599 0x00002000 /* Check URG bit in header */
+#define IXGBE_IMIR_CTRL_ACK_82599 0x00004000 /* Check ACK bit in header */
+#define IXGBE_IMIR_CTRL_PSH_82599 0x00008000 /* Check PSH bit in header */
+#define IXGBE_IMIR_CTRL_RST_82599 0x00010000 /* Check RST bit in header */
+#define IXGBE_IMIR_CTRL_SYN_82599 0x00020000 /* Check SYN bit in header */
+#define IXGBE_IMIR_CTRL_FIN_82599 0x00040000 /* Check FIN bit in header */
+#define IXGBE_IMIR_CTRL_BP_82599 0x00080000 /* Bypass chk of ctrl bits */
+#define IXGBE_IMIR_LLI_EN_82599 0x00100000 /* Enables low latency Int */
+#define IXGBE_IMIR_RX_QUEUE_MASK_82599 0x0000007F /* Rx Queue Mask */
+#define IXGBE_IMIR_RX_QUEUE_SHIFT_82599 21 /* Rx Queue Shift */
+#define IXGBE_IMIRVP_PRIORITY_MASK 0x00000007 /* VLAN priority mask */
+#define IXGBE_IMIRVP_PRIORITY_EN 0x00000008 /* VLAN priority enable */
+
+#define IXGBE_MAX_FTQF_FILTERS 128
+#define IXGBE_FTQF_PROTOCOL_MASK 0x00000003
+#define IXGBE_FTQF_PROTOCOL_TCP 0x00000000
+#define IXGBE_FTQF_PROTOCOL_UDP 0x00000001
+#define IXGBE_FTQF_PROTOCOL_SCTP 2
+#define IXGBE_FTQF_PRIORITY_MASK 0x00000007
+#define IXGBE_FTQF_PRIORITY_SHIFT 2
+#define IXGBE_FTQF_POOL_MASK 0x0000003F
+#define IXGBE_FTQF_POOL_SHIFT 8
+#define IXGBE_FTQF_5TUPLE_MASK_MASK 0x0000001F
+#define IXGBE_FTQF_5TUPLE_MASK_SHIFT 25
+#define IXGBE_FTQF_SOURCE_ADDR_MASK 0x1E
+#define IXGBE_FTQF_DEST_ADDR_MASK 0x1D
+#define IXGBE_FTQF_SOURCE_PORT_MASK 0x1B
+#define IXGBE_FTQF_DEST_PORT_MASK 0x17
+#define IXGBE_FTQF_PROTOCOL_COMP_MASK 0x0F
+#define IXGBE_FTQF_POOL_MASK_EN 0x40000000
+#define IXGBE_FTQF_QUEUE_ENABLE 0x80000000
+
+/* Interrupt clear mask */
+#define IXGBE_IRQ_CLEAR_MASK 0xFFFFFFFF
+
+/* Interrupt Vector Allocation Registers */
+#define IXGBE_IVAR_REG_NUM 25
+#define IXGBE_IVAR_REG_NUM_82599 64
+#define IXGBE_IVAR_TXRX_ENTRY 96
+#define IXGBE_IVAR_RX_ENTRY 64
+#define IXGBE_IVAR_RX_QUEUE(_i) (0 + (_i))
+#define IXGBE_IVAR_TX_QUEUE(_i) (64 + (_i))
+#define IXGBE_IVAR_TX_ENTRY 32
+
+#define IXGBE_IVAR_TCP_TIMER_INDEX 96 /* 0 based index */
+#define IXGBE_IVAR_OTHER_CAUSES_INDEX 97 /* 0 based index */
+
+#define IXGBE_MSIX_VECTOR(_i) (0 + (_i))
+
+#define IXGBE_IVAR_ALLOC_VAL 0x80 /* Interrupt Allocation valid */
+
+/* ETYPE Queue Filter/Select Bit Masks */
+#define IXGBE_MAX_ETQF_FILTERS 8
+#define IXGBE_ETQF_FCOE 0x08000000 /* bit 27 */
+#define IXGBE_ETQF_BCN 0x10000000 /* bit 28 */
+#define IXGBE_ETQF_1588 0x40000000 /* bit 30 */
+#define IXGBE_ETQF_FILTER_EN 0x80000000 /* bit 31 */
+#define IXGBE_ETQF_POOL_ENABLE (1 << 26) /* bit 26 */
+
+#define IXGBE_ETQS_RX_QUEUE 0x007F0000 /* bits 22:16 */
+#define IXGBE_ETQS_RX_QUEUE_SHIFT 16
+#define IXGBE_ETQS_LLI 0x20000000 /* bit 29 */
+#define IXGBE_ETQS_QUEUE_EN 0x80000000 /* bit 31 */
+
+/*
+ * ETQF filter list: one static filter per filter consumer. This is
+ * to avoid filter collisions later. Add new filters
+ * here!!
+ *
+ * Current filters:
+ * EAPOL 802.1x (0x888e): Filter 0
+ * FCoE (0x8906): Filter 2
+ * 1588 (0x88f7): Filter 3
+ * FIP (0x8914): Filter 4
+ */
+#define IXGBE_ETQF_FILTER_EAPOL 0
+#define IXGBE_ETQF_FILTER_FCOE 2
+#define IXGBE_ETQF_FILTER_1588 3
+#define IXGBE_ETQF_FILTER_FIP 4
+/* VLAN Control Bit Masks */
+#define IXGBE_VLNCTRL_VET 0x0000FFFF /* bits 0-15 */
+#define IXGBE_VLNCTRL_CFI 0x10000000 /* bit 28 */
+#define IXGBE_VLNCTRL_CFIEN 0x20000000 /* bit 29 */
+#define IXGBE_VLNCTRL_VFE 0x40000000 /* bit 30 */
+#define IXGBE_VLNCTRL_VME 0x80000000 /* bit 31 */
+
+/* VLAN pool filtering masks */
+#define IXGBE_VLVF_VIEN 0x80000000 /* filter is valid */
+#define IXGBE_VLVF_ENTRIES 64
+#define IXGBE_VLVF_VLANID_MASK 0x00000FFF
+/* Per VF Port VLAN insertion rules */
+#define IXGBE_VMVIR_VLANA_DEFAULT 0x40000000 /* Always use default VLAN */
+#define IXGBE_VMVIR_VLANA_NEVER 0x80000000 /* Never insert VLAN tag */
+
+#define IXGBE_ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.1q protocol */
+
+/* STATUS Bit Masks */
+#define IXGBE_STATUS_LAN_ID 0x0000000C /* LAN ID */
+#define IXGBE_STATUS_LAN_ID_SHIFT 2 /* LAN ID Shift*/
+#define IXGBE_STATUS_GIO 0x00080000 /* GIO Master Ena Status */
+
+#define IXGBE_STATUS_LAN_ID_0 0x00000000 /* LAN ID 0 */
+#define IXGBE_STATUS_LAN_ID_1 0x00000004 /* LAN ID 1 */
+
+/* ESDP Bit Masks */
+#define IXGBE_ESDP_SDP0 0x00000001 /* SDP0 Data Value */
+#define IXGBE_ESDP_SDP1 0x00000002 /* SDP1 Data Value */
+#define IXGBE_ESDP_SDP2 0x00000004 /* SDP2 Data Value */
+#define IXGBE_ESDP_SDP3 0x00000008 /* SDP3 Data Value */
+#define IXGBE_ESDP_SDP4 0x00000010 /* SDP4 Data Value */
+#define IXGBE_ESDP_SDP5 0x00000020 /* SDP5 Data Value */
+#define IXGBE_ESDP_SDP6 0x00000040 /* SDP6 Data Value */
+#define IXGBE_ESDP_SDP0_DIR 0x00000100 /* SDP0 IO direction */
+#define IXGBE_ESDP_SDP1_DIR 0x00000200 /* SDP1 IO direction */
+#define IXGBE_ESDP_SDP4_DIR 0x00001000 /* SDP4 IO direction */
+#define IXGBE_ESDP_SDP5_DIR 0x00002000 /* SDP5 IO direction */
+#define IXGBE_ESDP_SDP0_NATIVE 0x00010000 /* SDP0 IO mode */
+#define IXGBE_ESDP_SDP1_NATIVE 0x00020000 /* SDP1 IO mode */
+
+
+/* LEDCTL Bit Masks */
+#define IXGBE_LED_IVRT_BASE 0x00000040
+#define IXGBE_LED_BLINK_BASE 0x00000080
+#define IXGBE_LED_MODE_MASK_BASE 0x0000000F
+#define IXGBE_LED_OFFSET(_base, _i) (_base << (8 * (_i)))
+#define IXGBE_LED_MODE_SHIFT(_i) (8*(_i))
+#define IXGBE_LED_IVRT(_i) IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i)
+#define IXGBE_LED_BLINK(_i) IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i)
+#define IXGBE_LED_MODE_MASK(_i) IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i)
+
+/* LED modes */
+#define IXGBE_LED_LINK_UP 0x0
+#define IXGBE_LED_LINK_10G 0x1
+#define IXGBE_LED_MAC 0x2
+#define IXGBE_LED_FILTER 0x3
+#define IXGBE_LED_LINK_ACTIVE 0x4
+#define IXGBE_LED_LINK_1G 0x5
+#define IXGBE_LED_ON 0xE
+#define IXGBE_LED_OFF 0xF
+
+/* AUTOC Bit Masks */
+#define IXGBE_AUTOC_KX4_KX_SUPP_MASK 0xC0000000
+#define IXGBE_AUTOC_KX4_SUPP 0x80000000
+#define IXGBE_AUTOC_KX_SUPP 0x40000000
+#define IXGBE_AUTOC_PAUSE 0x30000000
+#define IXGBE_AUTOC_ASM_PAUSE 0x20000000
+#define IXGBE_AUTOC_SYM_PAUSE 0x10000000
+#define IXGBE_AUTOC_RF 0x08000000
+#define IXGBE_AUTOC_PD_TMR 0x06000000
+#define IXGBE_AUTOC_AN_RX_LOOSE 0x01000000
+#define IXGBE_AUTOC_AN_RX_DRIFT 0x00800000
+#define IXGBE_AUTOC_AN_RX_ALIGN 0x007C0000
+#define IXGBE_AUTOC_FECA 0x00040000
+#define IXGBE_AUTOC_FECR 0x00020000
+#define IXGBE_AUTOC_KR_SUPP 0x00010000
+#define IXGBE_AUTOC_AN_RESTART 0x00001000
+#define IXGBE_AUTOC_FLU 0x00000001
+#define IXGBE_AUTOC_LMS_SHIFT 13
+#define IXGBE_AUTOC_LMS_10G_SERIAL (0x3 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_KX_KR (0x4 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_SGMII_1G_100M (0x5 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN (0x6 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII (0x7 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_MASK (0x7 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_1G_LINK_NO_AN (0x0 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_10G_LINK_NO_AN (0x1 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_1G_AN (0x2 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_AN (0x4 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_AN_1G_AN (0x6 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_ATTACH_TYPE (0x7 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+
+#define IXGBE_AUTOC_1G_PMA_PMD_MASK 0x00000200
+#define IXGBE_AUTOC_1G_PMA_PMD_SHIFT 9
+#define IXGBE_AUTOC_10G_PMA_PMD_MASK 0x00000180
+#define IXGBE_AUTOC_10G_PMA_PMD_SHIFT 7
+#define IXGBE_AUTOC_10G_XAUI (0x0 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_10G_KX4 (0x1 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_10G_CX4 (0x2 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_BX (0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_KX (0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_SFI (0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_KX_BX (0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+
+#define IXGBE_AUTOC2_UPPER_MASK 0xFFFF0000
+#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK 0x00030000
+#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT 16
+#define IXGBE_AUTOC2_10G_KR (0x0 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC2_10G_XFI (0x1 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC2_10G_SFI (0x2 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
+
+#define IXGBE_MACC_FLU 0x00000001
+#define IXGBE_MACC_FSV_10G 0x00030000
+#define IXGBE_MACC_FS 0x00040000
+#define IXGBE_MAC_RX2TX_LPBK 0x00000002
+
+/* LINKS Bit Masks */
+#define IXGBE_LINKS_KX_AN_COMP 0x80000000
+#define IXGBE_LINKS_UP 0x40000000
+#define IXGBE_LINKS_SPEED 0x20000000
+#define IXGBE_LINKS_MODE 0x18000000
+#define IXGBE_LINKS_RX_MODE 0x06000000
+#define IXGBE_LINKS_TX_MODE 0x01800000
+#define IXGBE_LINKS_XGXS_EN 0x00400000
+#define IXGBE_LINKS_SGMII_EN 0x02000000
+#define IXGBE_LINKS_PCS_1G_EN 0x00200000
+#define IXGBE_LINKS_1G_AN_EN 0x00100000
+#define IXGBE_LINKS_KX_AN_IDLE 0x00080000
+#define IXGBE_LINKS_1G_SYNC 0x00040000
+#define IXGBE_LINKS_10G_ALIGN 0x00020000
+#define IXGBE_LINKS_10G_LANE_SYNC 0x00017000
+#define IXGBE_LINKS_TL_FAULT 0x00001000
+#define IXGBE_LINKS_SIGNAL 0x00000F00
+
+#define IXGBE_LINKS_SPEED_82599 0x30000000
+#define IXGBE_LINKS_SPEED_10G_82599 0x30000000
+#define IXGBE_LINKS_SPEED_1G_82599 0x20000000
+#define IXGBE_LINKS_SPEED_100_82599 0x10000000
+#define IXGBE_LINK_UP_TIME 90 /* 9.0 Seconds */
+#define IXGBE_AUTO_NEG_TIME 45 /* 4.5 Seconds */
+
+#define IXGBE_LINKS2_AN_SUPPORTED 0x00000040
+
+/* PCS1GLSTA Bit Masks */
+#define IXGBE_PCS1GLSTA_LINK_OK 1
+#define IXGBE_PCS1GLSTA_SYNK_OK 0x10
+#define IXGBE_PCS1GLSTA_AN_COMPLETE 0x10000
+#define IXGBE_PCS1GLSTA_AN_PAGE_RX 0x20000
+#define IXGBE_PCS1GLSTA_AN_TIMED_OUT 0x40000
+#define IXGBE_PCS1GLSTA_AN_REMOTE_FAULT 0x80000
+#define IXGBE_PCS1GLSTA_AN_ERROR_RWS 0x100000
+
+#define IXGBE_PCS1GANA_SYM_PAUSE 0x80
+#define IXGBE_PCS1GANA_ASM_PAUSE 0x100
+
+/* PCS1GLCTL Bit Masks */
+#define IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN 0x00040000 /* PCS 1G autoneg to en */
+#define IXGBE_PCS1GLCTL_FLV_LINK_UP 1
+#define IXGBE_PCS1GLCTL_FORCE_LINK 0x20
+#define IXGBE_PCS1GLCTL_LOW_LINK_LATCH 0x40
+#define IXGBE_PCS1GLCTL_AN_ENABLE 0x10000
+#define IXGBE_PCS1GLCTL_AN_RESTART 0x20000
+
+/* ANLP1 Bit Masks */
+#define IXGBE_ANLP1_PAUSE 0x0C00
+#define IXGBE_ANLP1_SYM_PAUSE 0x0400
+#define IXGBE_ANLP1_ASM_PAUSE 0x0800
+#define IXGBE_ANLP1_AN_STATE_MASK 0x000f0000
+
+/* SW Semaphore Register bitmasks */
+#define IXGBE_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */
+#define IXGBE_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */
+#define IXGBE_SWSM_WMNG 0x00000004 /* Wake MNG Clock */
+#define IXGBE_SWFW_REGSMP 0x80000000 /* Register Semaphore bit 31 */
+
+/* SW_FW_SYNC/GSSR definitions */
+#define IXGBE_GSSR_EEP_SM 0x0001
+#define IXGBE_GSSR_PHY0_SM 0x0002
+#define IXGBE_GSSR_PHY1_SM 0x0004
+#define IXGBE_GSSR_MAC_CSR_SM 0x0008
+#define IXGBE_GSSR_FLASH_SM 0x0010
+#define IXGBE_GSSR_SW_MNG_SM 0x0400
+
+/* FW Status register bitmask */
+#define IXGBE_FWSTS_FWRI 0x00000200 /* Firmware Reset Indication */
+
+/* EEC Register */
+#define IXGBE_EEC_SK 0x00000001 /* EEPROM Clock */
+#define IXGBE_EEC_CS 0x00000002 /* EEPROM Chip Select */
+#define IXGBE_EEC_DI 0x00000004 /* EEPROM Data In */
+#define IXGBE_EEC_DO 0x00000008 /* EEPROM Data Out */
+#define IXGBE_EEC_FWE_MASK 0x00000030 /* FLASH Write Enable */
+#define IXGBE_EEC_FWE_DIS 0x00000010 /* Disable FLASH writes */
+#define IXGBE_EEC_FWE_EN 0x00000020 /* Enable FLASH writes */
+#define IXGBE_EEC_FWE_SHIFT 4
+#define IXGBE_EEC_REQ 0x00000040 /* EEPROM Access Request */
+#define IXGBE_EEC_GNT 0x00000080 /* EEPROM Access Grant */
+#define IXGBE_EEC_PRES 0x00000100 /* EEPROM Present */
+#define IXGBE_EEC_ARD 0x00000200 /* EEPROM Auto Read Done */
+#define IXGBE_EEC_FLUP 0x00800000 /* Flash update command */
+#define IXGBE_EEC_SEC1VAL 0x02000000 /* Sector 1 Valid */
+#define IXGBE_EEC_FLUDONE 0x04000000 /* Flash update done */
+/* EEPROM Addressing bits based on type (0-small, 1-large) */
+#define IXGBE_EEC_ADDR_SIZE 0x00000400
+#define IXGBE_EEC_SIZE 0x00007800 /* EEPROM Size */
+#define IXGBE_EERD_MAX_ADDR 0x00003FFF /* EERD alows 14 bits for addr. */
+
+#define IXGBE_EEC_SIZE_SHIFT 11
+#define IXGBE_EEPROM_WORD_SIZE_SHIFT 6
+#define IXGBE_EEPROM_OPCODE_BITS 8
+
+/* Part Number String Length */
+#define IXGBE_PBANUM_LENGTH 11
+
+/* Checksum and EEPROM pointers */
+#define IXGBE_PBANUM_PTR_GUARD 0xFAFA
+#define IXGBE_EEPROM_CHECKSUM 0x3F
+#define IXGBE_EEPROM_SUM 0xBABA
+#define IXGBE_PCIE_ANALOG_PTR 0x03
+#define IXGBE_ATLAS0_CONFIG_PTR 0x04
+#define IXGBE_PHY_PTR 0x04
+#define IXGBE_ATLAS1_CONFIG_PTR 0x05
+#define IXGBE_OPTION_ROM_PTR 0x05
+#define IXGBE_PCIE_GENERAL_PTR 0x06
+#define IXGBE_PCIE_CONFIG0_PTR 0x07
+#define IXGBE_PCIE_CONFIG1_PTR 0x08
+#define IXGBE_CORE0_PTR 0x09
+#define IXGBE_CORE1_PTR 0x0A
+#define IXGBE_MAC0_PTR 0x0B
+#define IXGBE_MAC1_PTR 0x0C
+#define IXGBE_CSR0_CONFIG_PTR 0x0D
+#define IXGBE_CSR1_CONFIG_PTR 0x0E
+#define IXGBE_FW_PTR 0x0F
+#define IXGBE_PBANUM0_PTR 0x15
+#define IXGBE_PBANUM1_PTR 0x16
+#define IXGBE_ALT_MAC_ADDR_PTR 0x37
+#define IXGBE_FREE_SPACE_PTR 0X3E
+
+/* External Thermal Sensor Config */
+#define IXGBE_ETS_CFG 0x26
+#define IXGBE_ETS_LTHRES_DELTA_MASK 0x07C0
+#define IXGBE_ETS_LTHRES_DELTA_SHIFT 6
+#define IXGBE_ETS_TYPE_MASK 0x0038
+#define IXGBE_ETS_TYPE_SHIFT 3
+#define IXGBE_ETS_TYPE_EMC 0x000
+#define IXGBE_ETS_NUM_SENSORS_MASK 0x0007
+#define IXGBE_ETS_DATA_LOC_MASK 0x3C00
+#define IXGBE_ETS_DATA_LOC_SHIFT 10
+#define IXGBE_ETS_DATA_INDEX_MASK 0x0300
+#define IXGBE_ETS_DATA_INDEX_SHIFT 8
+#define IXGBE_ETS_DATA_HTHRESH_MASK 0x00FF
+
+#define IXGBE_SAN_MAC_ADDR_PTR 0x28
+#define IXGBE_DEVICE_CAPS 0x2C
+#define IXGBE_SERIAL_NUMBER_MAC_ADDR 0x11
+#define IXGBE_PCIE_MSIX_82599_CAPS 0x72
+#define IXGBE_MAX_MSIX_VECTORS_82599 0x40
+#define IXGBE_PCIE_MSIX_82598_CAPS 0x62
+#define IXGBE_MAX_MSIX_VECTORS_82598 0x13
+
+/* MSI-X capability fields masks */
+#define IXGBE_PCIE_MSIX_TBL_SZ_MASK 0x7FF
+
+/* Legacy EEPROM word offsets */
+#define IXGBE_ISCSI_BOOT_CAPS 0x0033
+#define IXGBE_ISCSI_SETUP_PORT_0 0x0030
+#define IXGBE_ISCSI_SETUP_PORT_1 0x0034
+
+/* EEPROM Commands - SPI */
+#define IXGBE_EEPROM_MAX_RETRY_SPI 5000 /* Max wait 5ms for RDY signal */
+#define IXGBE_EEPROM_STATUS_RDY_SPI 0x01
+#define IXGBE_EEPROM_READ_OPCODE_SPI 0x03 /* EEPROM read opcode */
+#define IXGBE_EEPROM_WRITE_OPCODE_SPI 0x02 /* EEPROM write opcode */
+#define IXGBE_EEPROM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = addr bit-8 */
+#define IXGBE_EEPROM_WREN_OPCODE_SPI 0x06 /* EEPROM set Write Ena latch */
+/* EEPROM reset Write Enable latch */
+#define IXGBE_EEPROM_WRDI_OPCODE_SPI 0x04
+#define IXGBE_EEPROM_RDSR_OPCODE_SPI 0x05 /* EEPROM read Status reg */
+#define IXGBE_EEPROM_WRSR_OPCODE_SPI 0x01 /* EEPROM write Status reg */
+#define IXGBE_EEPROM_ERASE4K_OPCODE_SPI 0x20 /* EEPROM ERASE 4KB */
+#define IXGBE_EEPROM_ERASE64K_OPCODE_SPI 0xD8 /* EEPROM ERASE 64KB */
+#define IXGBE_EEPROM_ERASE256_OPCODE_SPI 0xDB /* EEPROM ERASE 256B */
+
+/* EEPROM Read Register */
+#define IXGBE_EEPROM_RW_REG_DATA 16 /* data offset in EEPROM read reg */
+#define IXGBE_EEPROM_RW_REG_DONE 2 /* Offset to READ done bit */
+#define IXGBE_EEPROM_RW_REG_START 1 /* First bit to start operation */
+#define IXGBE_EEPROM_RW_ADDR_SHIFT 2 /* Shift to the address bits */
+#define IXGBE_NVM_POLL_WRITE 1 /* Flag for polling for wr complete */
+#define IXGBE_NVM_POLL_READ 0 /* Flag for polling for rd complete */
+
+#define IXGBE_ETH_LENGTH_OF_ADDRESS 6
+
+#define IXGBE_EEPROM_PAGE_SIZE_MAX 128
+#define IXGBE_EEPROM_RD_BUFFER_MAX_COUNT 512 /* words rd in burst */
+#define IXGBE_EEPROM_WR_BUFFER_MAX_COUNT 256 /* words wr in burst */
+
+#ifndef IXGBE_EEPROM_GRANT_ATTEMPTS
+#define IXGBE_EEPROM_GRANT_ATTEMPTS 1000 /* EEPROM attempts to gain grant */
+#endif
+
+#ifndef IXGBE_EERD_EEWR_ATTEMPTS
+/* Number of 5 microseconds we wait for EERD read and
+ * EERW write to complete */
+#define IXGBE_EERD_EEWR_ATTEMPTS 100000
+#endif
+
+#ifndef IXGBE_FLUDONE_ATTEMPTS
+/* # attempts we wait for flush update to complete */
+#define IXGBE_FLUDONE_ATTEMPTS 20000
+#endif
+
+#define IXGBE_PCIE_CTRL2 0x5 /* PCIe Control 2 Offset */
+#define IXGBE_PCIE_CTRL2_DUMMY_ENABLE 0x8 /* Dummy Function Enable */
+#define IXGBE_PCIE_CTRL2_LAN_DISABLE 0x2 /* LAN PCI Disable */
+#define IXGBE_PCIE_CTRL2_DISABLE_SELECT 0x1 /* LAN Disable Select */
+
+#define IXGBE_SAN_MAC_ADDR_PORT0_OFFSET 0x0
+#define IXGBE_SAN_MAC_ADDR_PORT1_OFFSET 0x3
+#define IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP 0x1
+#define IXGBE_DEVICE_CAPS_FCOE_OFFLOADS 0x2
+#define IXGBE_FW_LESM_PARAMETERS_PTR 0x2
+#define IXGBE_FW_LESM_STATE_1 0x1
+#define IXGBE_FW_LESM_STATE_ENABLED 0x8000 /* LESM Enable bit */
+#define IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR 0x4
+#define IXGBE_FW_PATCH_VERSION_4 0x7
+#define IXGBE_FCOE_IBA_CAPS_BLK_PTR 0x33 /* iSCSI/FCOE block */
+#define IXGBE_FCOE_IBA_CAPS_FCOE 0x20 /* FCOE flags */
+#define IXGBE_ISCSI_FCOE_BLK_PTR 0x17 /* iSCSI/FCOE block */
+#define IXGBE_ISCSI_FCOE_FLAGS_OFFSET 0x0 /* FCOE flags */
+#define IXGBE_ISCSI_FCOE_FLAGS_ENABLE 0x1 /* FCOE flags enable bit */
+#define IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR 0x27 /* Alt. SAN MAC block */
+#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET 0x0 /* Alt SAN MAC capability */
+#define IXGBE_ALT_SAN_MAC_ADDR_PORT0_OFFSET 0x1 /* Alt SAN MAC 0 offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_PORT1_OFFSET 0x4 /* Alt SAN MAC 1 offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET 0x7 /* Alt WWNN prefix offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET 0x8 /* Alt WWPN prefix offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_SANMAC 0x0 /* Alt SAN MAC exists */
+#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN 0x1 /* Alt WWN base exists */
+
+#define IXGBE_DEVICE_CAPS_WOL_PORT0_1 0x4 /* WoL supported on ports 0 & 1 */
+#define IXGBE_DEVICE_CAPS_WOL_PORT0 0x8 /* WoL supported on port 0 */
+#define IXGBE_DEVICE_CAPS_WOL_MASK 0xC /* Mask for WoL capabilities */
+
+/* PCI Bus Info */
+#define IXGBE_PCI_DEVICE_STATUS 0xAA
+#define IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING 0x0020
+#define IXGBE_PCI_LINK_STATUS 0xB2
+#define IXGBE_PCI_DEVICE_CONTROL2 0xC8
+#define IXGBE_PCI_LINK_WIDTH 0x3F0
+#define IXGBE_PCI_LINK_WIDTH_1 0x10
+#define IXGBE_PCI_LINK_WIDTH_2 0x20
+#define IXGBE_PCI_LINK_WIDTH_4 0x40
+#define IXGBE_PCI_LINK_WIDTH_8 0x80
+#define IXGBE_PCI_LINK_SPEED 0xF
+#define IXGBE_PCI_LINK_SPEED_2500 0x1
+#define IXGBE_PCI_LINK_SPEED_5000 0x2
+#define IXGBE_PCI_LINK_SPEED_8000 0x3
+#define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E
+#define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80
+#define IXGBE_PCI_DEVICE_CONTROL2_16ms 0x0005
+
+/* Number of 100 microseconds we wait for PCI Express master disable */
+#define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800
+
+/* Check whether address is multicast. This is little-endian specific check.*/
+#define IXGBE_IS_MULTICAST(Address) \
+ (bool)(((u8 *)(Address))[0] & ((u8)0x01))
+
+/* Check whether an address is broadcast. */
+#define IXGBE_IS_BROADCAST(Address) \
+ ((((u8 *)(Address))[0] == ((u8)0xff)) && \
+ (((u8 *)(Address))[1] == ((u8)0xff)))
+
+/* RAH */
+#define IXGBE_RAH_VIND_MASK 0x003C0000
+#define IXGBE_RAH_VIND_SHIFT 18
+#define IXGBE_RAH_AV 0x80000000
+#define IXGBE_CLEAR_VMDQ_ALL 0xFFFFFFFF
+
+/* Header split receive */
+#define IXGBE_RFCTL_ISCSI_DIS 0x00000001
+#define IXGBE_RFCTL_ISCSI_DWC_MASK 0x0000003E
+#define IXGBE_RFCTL_ISCSI_DWC_SHIFT 1
+#define IXGBE_RFCTL_RSC_DIS 0x00000010
+#define IXGBE_RFCTL_NFSW_DIS 0x00000040
+#define IXGBE_RFCTL_NFSR_DIS 0x00000080
+#define IXGBE_RFCTL_NFS_VER_MASK 0x00000300
+#define IXGBE_RFCTL_NFS_VER_SHIFT 8
+#define IXGBE_RFCTL_NFS_VER_2 0
+#define IXGBE_RFCTL_NFS_VER_3 1
+#define IXGBE_RFCTL_NFS_VER_4 2
+#define IXGBE_RFCTL_IPV6_DIS 0x00000400
+#define IXGBE_RFCTL_IPV6_XSUM_DIS 0x00000800
+#define IXGBE_RFCTL_IPFRSP_DIS 0x00004000
+#define IXGBE_RFCTL_IPV6_EX_DIS 0x00010000
+#define IXGBE_RFCTL_NEW_IPV6_EXT_DIS 0x00020000
+
+/* Transmit Config masks */
+#define IXGBE_TXDCTL_ENABLE 0x02000000 /* Ena specific Tx Queue */
+#define IXGBE_TXDCTL_SWFLSH 0x04000000 /* Tx Desc. wr-bk flushing */
+#define IXGBE_TXDCTL_WTHRESH_SHIFT 16 /* shift to WTHRESH bits */
+/* Enable short packet padding to 64 bytes */
+#define IXGBE_TX_PAD_ENABLE 0x00000400
+#define IXGBE_JUMBO_FRAME_ENABLE 0x00000004 /* Allow jumbo frames */
+/* This allows for 16K packets + 4k for vlan */
+#define IXGBE_MAX_FRAME_SZ 0x40040000
+
+#define IXGBE_TDWBAL_HEAD_WB_ENABLE 0x1 /* Tx head write-back enable */
+#define IXGBE_TDWBAL_SEQNUM_WB_ENABLE 0x2 /* Tx seq# write-back enable */
+
+/* Receive Config masks */
+#define IXGBE_RXCTRL_RXEN 0x00000001 /* Enable Receiver */
+#define IXGBE_RXCTRL_DMBYPS 0x00000002 /* Desc Monitor Bypass */
+#define IXGBE_RXDCTL_ENABLE 0x02000000 /* Ena specific Rx Queue */
+#define IXGBE_RXDCTL_SWFLSH 0x04000000 /* Rx Desc wr-bk flushing */
+#define IXGBE_RXDCTL_RLPMLMASK 0x00003FFF /* X540 supported only */
+#define IXGBE_RXDCTL_RLPML_EN 0x00008000
+#define IXGBE_RXDCTL_VME 0x40000000 /* VLAN mode enable */
+
+#define IXGBE_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */
+#define IXGBE_TSYNCTXCTL_ENABLED 0x00000010 /* Tx timestamping enabled */
+
+#define IXGBE_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */
+#define IXGBE_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */
+#define IXGBE_TSYNCRXCTL_TYPE_L2_V2 0x00
+#define IXGBE_TSYNCRXCTL_TYPE_L4_V1 0x02
+#define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2 0x04
+#define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2 0x0A
+#define IXGBE_TSYNCRXCTL_ENABLED 0x00000010 /* Rx Timestamping enabled */
+
+#define IXGBE_RXMTRL_V1_CTRLT_MASK 0x000000FF
+#define IXGBE_RXMTRL_V1_SYNC_MSG 0x00
+#define IXGBE_RXMTRL_V1_DELAY_REQ_MSG 0x01
+#define IXGBE_RXMTRL_V1_FOLLOWUP_MSG 0x02
+#define IXGBE_RXMTRL_V1_DELAY_RESP_MSG 0x03
+#define IXGBE_RXMTRL_V1_MGMT_MSG 0x04
+
+#define IXGBE_RXMTRL_V2_MSGID_MASK 0x0000FF00
+#define IXGBE_RXMTRL_V2_SYNC_MSG 0x0000
+#define IXGBE_RXMTRL_V2_DELAY_REQ_MSG 0x0100
+#define IXGBE_RXMTRL_V2_PDELAY_REQ_MSG 0x0200
+#define IXGBE_RXMTRL_V2_PDELAY_RESP_MSG 0x0300
+#define IXGBE_RXMTRL_V2_FOLLOWUP_MSG 0x0800
+#define IXGBE_RXMTRL_V2_DELAY_RESP_MSG 0x0900
+#define IXGBE_RXMTRL_V2_PDELAY_FOLLOWUP_MSG 0x0A00
+#define IXGBE_RXMTRL_V2_ANNOUNCE_MSG 0x0B00
+#define IXGBE_RXMTRL_V2_SIGNALLING_MSG 0x0C00
+#define IXGBE_RXMTRL_V2_MGMT_MSG 0x0D00
+
+#define IXGBE_FCTRL_SBP 0x00000002 /* Store Bad Packet */
+#define IXGBE_FCTRL_MPE 0x00000100 /* Multicast Promiscuous Ena*/
+#define IXGBE_FCTRL_UPE 0x00000200 /* Unicast Promiscuous Ena */
+#define IXGBE_FCTRL_BAM 0x00000400 /* Broadcast Accept Mode */
+#define IXGBE_FCTRL_PMCF 0x00001000 /* Pass MAC Control Frames */
+#define IXGBE_FCTRL_DPF 0x00002000 /* Discard Pause Frame */
+/* Receive Priority Flow Control Enable */
+#define IXGBE_FCTRL_RPFCE 0x00004000
+#define IXGBE_FCTRL_RFCE 0x00008000 /* Receive Flow Control Ena */
+#define IXGBE_MFLCN_PMCF 0x00000001 /* Pass MAC Control Frames */
+#define IXGBE_MFLCN_DPF 0x00000002 /* Discard Pause Frame */
+#define IXGBE_MFLCN_RPFCE 0x00000004 /* Receive Priority FC Enable */
+#define IXGBE_MFLCN_RFCE 0x00000008 /* Receive FC Enable */
+#define IXGBE_MFLCN_RPFCE_MASK 0x00000FF4 /* Rx Priority FC bitmap mask */
+#define IXGBE_MFLCN_RPFCE_SHIFT 4 /* Rx Priority FC bitmap shift */
+
+/* Multiple Receive Queue Control */
+#define IXGBE_MRQC_RSSEN 0x00000001 /* RSS Enable */
+#define IXGBE_MRQC_MRQE_MASK 0xF /* Bits 3:0 */
+#define IXGBE_MRQC_RT8TCEN 0x00000002 /* 8 TC no RSS */
+#define IXGBE_MRQC_RT4TCEN 0x00000003 /* 4 TC no RSS */
+#define IXGBE_MRQC_RTRSS8TCEN 0x00000004 /* 8 TC w/ RSS */
+#define IXGBE_MRQC_RTRSS4TCEN 0x00000005 /* 4 TC w/ RSS */
+#define IXGBE_MRQC_VMDQEN 0x00000008 /* VMDq2 64 pools no RSS */
+#define IXGBE_MRQC_VMDQRSS32EN 0x0000000A /* VMDq2 32 pools w/ RSS */
+#define IXGBE_MRQC_VMDQRSS64EN 0x0000000B /* VMDq2 64 pools w/ RSS */
+#define IXGBE_MRQC_VMDQRT8TCEN 0x0000000C /* VMDq2/RT 16 pool 8 TC */
+#define IXGBE_MRQC_VMDQRT4TCEN 0x0000000D /* VMDq2/RT 32 pool 4 TC */
+#define IXGBE_MRQC_RSS_FIELD_MASK 0xFFFF0000
+#define IXGBE_MRQC_RSS_FIELD_IPV4_TCP 0x00010000
+#define IXGBE_MRQC_RSS_FIELD_IPV4 0x00020000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP 0x00040000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_EX 0x00080000
+#define IXGBE_MRQC_RSS_FIELD_IPV6 0x00100000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_TCP 0x00200000
+#define IXGBE_MRQC_RSS_FIELD_IPV4_UDP 0x00400000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_UDP 0x00800000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP 0x01000000
+#define IXGBE_MRQC_L3L4TXSWEN 0x00008000
+
+/* Queue Drop Enable */
+#define IXGBE_QDE_ENABLE 0x00000001
+#define IXGBE_QDE_IDX_MASK 0x00007F00
+#define IXGBE_QDE_IDX_SHIFT 8
+#define IXGBE_QDE_WRITE 0x00010000
+#define IXGBE_QDE_READ 0x00020000
+
+#define IXGBE_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */
+#define IXGBE_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */
+#define IXGBE_TXD_CMD_EOP 0x01000000 /* End of Packet */
+#define IXGBE_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
+#define IXGBE_TXD_CMD_IC 0x04000000 /* Insert Checksum */
+#define IXGBE_TXD_CMD_RS 0x08000000 /* Report Status */
+#define IXGBE_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */
+#define IXGBE_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */
+#define IXGBE_TXD_STAT_DD 0x00000001 /* Descriptor Done */
+
+#define IXGBE_RXDADV_IPSEC_STATUS_SECP 0x00020000
+#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000
+#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000
+#define IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED 0x18000000
+#define IXGBE_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000
+/* Multiple Transmit Queue Command Register */
+#define IXGBE_MTQC_RT_ENA 0x1 /* DCB Enable */
+#define IXGBE_MTQC_VT_ENA 0x2 /* VMDQ2 Enable */
+#define IXGBE_MTQC_64Q_1PB 0x0 /* 64 queues 1 pack buffer */
+#define IXGBE_MTQC_32VF 0x8 /* 4 TX Queues per pool w/32VF's */
+#define IXGBE_MTQC_64VF 0x4 /* 2 TX Queues per pool w/64VF's */
+#define IXGBE_MTQC_4TC_4TQ 0x8 /* 4 TC if RT_ENA and VT_ENA */
+#define IXGBE_MTQC_8TC_8TQ 0xC /* 8 TC if RT_ENA or 8 TQ if VT_ENA */
+
+/* Receive Descriptor bit definitions */
+#define IXGBE_RXD_STAT_DD 0x01 /* Descriptor Done */
+#define IXGBE_RXD_STAT_EOP 0x02 /* End of Packet */
+#define IXGBE_RXD_STAT_FLM 0x04 /* FDir Match */
+#define IXGBE_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */
+#define IXGBE_RXDADV_NEXTP_MASK 0x000FFFF0 /* Next Descriptor Index */
+#define IXGBE_RXDADV_NEXTP_SHIFT 0x00000004
+#define IXGBE_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */
+#define IXGBE_RXD_STAT_L4CS 0x20 /* L4 xsum calculated */
+#define IXGBE_RXD_STAT_IPCS 0x40 /* IP xsum calculated */
+#define IXGBE_RXD_STAT_PIF 0x80 /* passed in-exact filter */
+#define IXGBE_RXD_STAT_CRCV 0x100 /* Speculative CRC Valid */
+#define IXGBE_RXD_STAT_VEXT 0x200 /* 1st VLAN found */
+#define IXGBE_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */
+#define IXGBE_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */
+#define IXGBE_RXD_STAT_LLINT 0x800 /* Pkt caused Low Latency Interrupt */
+#define IXGBE_RXD_STAT_TS 0x10000 /* Time Stamp */
+#define IXGBE_RXD_STAT_SECP 0x20000 /* Security Processing */
+#define IXGBE_RXD_STAT_LB 0x40000 /* Loopback Status */
+#define IXGBE_RXD_STAT_ACK 0x8000 /* ACK Packet indication */
+#define IXGBE_RXD_ERR_CE 0x01 /* CRC Error */
+#define IXGBE_RXD_ERR_LE 0x02 /* Length Error */
+#define IXGBE_RXD_ERR_PE 0x08 /* Packet Error */
+#define IXGBE_RXD_ERR_OSE 0x10 /* Oversize Error */
+#define IXGBE_RXD_ERR_USE 0x20 /* Undersize Error */
+#define IXGBE_RXD_ERR_TCPE 0x40 /* TCP/UDP Checksum Error */
+#define IXGBE_RXD_ERR_IPE 0x80 /* IP Checksum Error */
+#define IXGBE_RXDADV_ERR_MASK 0xfff00000 /* RDESC.ERRORS mask */
+#define IXGBE_RXDADV_ERR_SHIFT 20 /* RDESC.ERRORS shift */
+#define IXGBE_RXDADV_ERR_RXE 0x20000000 /* Any MAC Error */
+#define IXGBE_RXDADV_ERR_FCEOFE 0x80000000 /* FCoEFe/IPE */
+#define IXGBE_RXDADV_ERR_FCERR 0x00700000 /* FCERR/FDIRERR */
+#define IXGBE_RXDADV_ERR_FDIR_LEN 0x00100000 /* FDIR Length error */
+#define IXGBE_RXDADV_ERR_FDIR_DROP 0x00200000 /* FDIR Drop error */
+#define IXGBE_RXDADV_ERR_FDIR_COLL 0x00400000 /* FDIR Collision error */
+#define IXGBE_RXDADV_ERR_HBO 0x00800000 /*Header Buffer Overflow */
+#define IXGBE_RXDADV_ERR_CE 0x01000000 /* CRC Error */
+#define IXGBE_RXDADV_ERR_LE 0x02000000 /* Length Error */
+#define IXGBE_RXDADV_ERR_PE 0x08000000 /* Packet Error */
+#define IXGBE_RXDADV_ERR_OSE 0x10000000 /* Oversize Error */
+#define IXGBE_RXDADV_ERR_USE 0x20000000 /* Undersize Error */
+#define IXGBE_RXDADV_ERR_TCPE 0x40000000 /* TCP/UDP Checksum Error */
+#define IXGBE_RXDADV_ERR_IPE 0x80000000 /* IP Checksum Error */
+#define IXGBE_RXD_VLAN_ID_MASK 0x0FFF /* VLAN ID is in lower 12 bits */
+#define IXGBE_RXD_PRI_MASK 0xE000 /* Priority is in upper 3 bits */
+#define IXGBE_RXD_PRI_SHIFT 13
+#define IXGBE_RXD_CFI_MASK 0x1000 /* CFI is bit 12 */
+#define IXGBE_RXD_CFI_SHIFT 12
+
+#define IXGBE_RXDADV_STAT_DD IXGBE_RXD_STAT_DD /* Done */
+#define IXGBE_RXDADV_STAT_EOP IXGBE_RXD_STAT_EOP /* End of Packet */
+#define IXGBE_RXDADV_STAT_FLM IXGBE_RXD_STAT_FLM /* FDir Match */
+#define IXGBE_RXDADV_STAT_VP IXGBE_RXD_STAT_VP /* IEEE VLAN Pkt */
+#define IXGBE_RXDADV_STAT_MASK 0x000fffff /* Stat/NEXTP: bit 0-19 */
+#define IXGBE_RXDADV_STAT_FCEOFS 0x00000040 /* FCoE EOF/SOF Stat */
+#define IXGBE_RXDADV_STAT_FCSTAT 0x00000030 /* FCoE Pkt Stat */
+#define IXGBE_RXDADV_STAT_FCSTAT_NOMTCH 0x00000000 /* 00: No Ctxt Match */
+#define IXGBE_RXDADV_STAT_FCSTAT_NODDP 0x00000010 /* 01: Ctxt w/o DDP */
+#define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP 0x00000020 /* 10: Recv. FCP_RSP */
+#define IXGBE_RXDADV_STAT_FCSTAT_DDP 0x00000030 /* 11: Ctxt w/ DDP */
+#define IXGBE_RXDADV_STAT_TS 0x00010000 /* IEEE1588 Time Stamp */
+
+/* PSRTYPE bit definitions */
+#define IXGBE_PSRTYPE_TCPHDR 0x00000010
+#define IXGBE_PSRTYPE_UDPHDR 0x00000020
+#define IXGBE_PSRTYPE_IPV4HDR 0x00000100
+#define IXGBE_PSRTYPE_IPV6HDR 0x00000200
+#define IXGBE_PSRTYPE_L2HDR 0x00001000
+
+/* SRRCTL bit definitions */
+#define IXGBE_SRRCTL_BSIZEPKT_SHIFT 10 /* so many KBs */
+#define IXGBE_SRRCTL_RDMTS_SHIFT 22
+#define IXGBE_SRRCTL_RDMTS_MASK 0x01C00000
+#define IXGBE_SRRCTL_DROP_EN 0x10000000
+#define IXGBE_SRRCTL_BSIZEPKT_MASK 0x0000007F
+#define IXGBE_SRRCTL_BSIZEHDR_MASK 0x00003F00
+#define IXGBE_SRRCTL_DESCTYPE_LEGACY 0x00000000
+#define IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000
+#define IXGBE_SRRCTL_DESCTYPE_MASK 0x0E000000
+
+#define IXGBE_RXDPS_HDRSTAT_HDRSP 0x00008000
+#define IXGBE_RXDPS_HDRSTAT_HDRLEN_MASK 0x000003FF
+
+#define IXGBE_RXDADV_RSSTYPE_MASK 0x0000000F
+#define IXGBE_RXDADV_PKTTYPE_MASK 0x0000FFF0
+#define IXGBE_RXDADV_PKTTYPE_MASK_EX 0x0001FFF0
+#define IXGBE_RXDADV_HDRBUFLEN_MASK 0x00007FE0
+#define IXGBE_RXDADV_RSCCNT_MASK 0x001E0000
+#define IXGBE_RXDADV_RSCCNT_SHIFT 17
+#define IXGBE_RXDADV_HDRBUFLEN_SHIFT 5
+#define IXGBE_RXDADV_SPLITHEADER_EN 0x00001000
+#define IXGBE_RXDADV_SPH 0x8000
+
+/* RSS Hash results */
+#define IXGBE_RXDADV_RSSTYPE_NONE 0x00000000
+#define IXGBE_RXDADV_RSSTYPE_IPV4_TCP 0x00000001
+#define IXGBE_RXDADV_RSSTYPE_IPV4 0x00000002
+#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP 0x00000003
+#define IXGBE_RXDADV_RSSTYPE_IPV6_EX 0x00000004
+#define IXGBE_RXDADV_RSSTYPE_IPV6 0x00000005
+#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006
+#define IXGBE_RXDADV_RSSTYPE_IPV4_UDP 0x00000007
+#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP 0x00000008
+#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009
+
+/* RSS Packet Types as indicated in the receive descriptor. */
+#define IXGBE_RXDADV_PKTTYPE_NONE 0x00000000
+#define IXGBE_RXDADV_PKTTYPE_IPV4 0x00000010 /* IPv4 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPV4_EX 0x00000020 /* IPv4 hdr + extensions */
+#define IXGBE_RXDADV_PKTTYPE_IPV6 0x00000040 /* IPv6 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPV6_EX 0x00000080 /* IPv6 hdr + extensions */
+#define IXGBE_RXDADV_PKTTYPE_TCP 0x00000100 /* TCP hdr present */
+#define IXGBE_RXDADV_PKTTYPE_UDP 0x00000200 /* UDP hdr present */
+#define IXGBE_RXDADV_PKTTYPE_SCTP 0x00000400 /* SCTP hdr present */
+#define IXGBE_RXDADV_PKTTYPE_NFS 0x00000800 /* NFS hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_AH 0x00002000 /* IPSec AH */
+#define IXGBE_RXDADV_PKTTYPE_LINKSEC 0x00004000 /* LinkSec Encap */
+#define IXGBE_RXDADV_PKTTYPE_ETQF 0x00008000 /* PKTTYPE is ETQF index */
+#define IXGBE_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */
+#define IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */
+
+/* Security Processing bit Indication */
+#define IXGBE_RXDADV_LNKSEC_STATUS_SECP 0x00020000
+#define IXGBE_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000
+#define IXGBE_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000
+#define IXGBE_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000
+#define IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000
+
+/* Masks to determine if packets should be dropped due to frame errors */
+#define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \
+ IXGBE_RXD_ERR_CE | \
+ IXGBE_RXD_ERR_LE | \
+ IXGBE_RXD_ERR_PE | \
+ IXGBE_RXD_ERR_OSE | \
+ IXGBE_RXD_ERR_USE)
+
+#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK ( \
+ IXGBE_RXDADV_ERR_CE | \
+ IXGBE_RXDADV_ERR_LE | \
+ IXGBE_RXDADV_ERR_PE | \
+ IXGBE_RXDADV_ERR_OSE | \
+ IXGBE_RXDADV_ERR_USE)
+
+#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK_82599 IXGBE_RXDADV_ERR_RXE
+
+/* Multicast bit mask */
+#define IXGBE_MCSTCTRL_MFE 0x4
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE 8
+#define IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE 8
+#define IXGBE_REQ_TX_BUFFER_GRANULARITY 1024
+
+/* Vlan-specific macros */
+#define IXGBE_RX_DESC_SPECIAL_VLAN_MASK 0x0FFF /* VLAN ID in lower 12 bits */
+#define IXGBE_RX_DESC_SPECIAL_PRI_MASK 0xE000 /* Priority in upper 3 bits */
+#define IXGBE_RX_DESC_SPECIAL_PRI_SHIFT 0x000D /* Priority in upper 3 of 16 */
+#define IXGBE_TX_DESC_SPECIAL_PRI_SHIFT IXGBE_RX_DESC_SPECIAL_PRI_SHIFT
+
+/* SR-IOV specific macros */
+#define IXGBE_MBVFICR_INDEX(vf_number) (vf_number >> 4)
+#define IXGBE_MBVFICR(_i) (0x00710 + ((_i) * 4))
+#define IXGBE_VFLRE(_i) (((_i & 1) ? 0x001C0 : 0x00600))
+#define IXGBE_VFLREC(_i) (0x00700 + ((_i) * 4))
+/* Translated register #defines */
+#define IXGBE_PVFCTRL(P) (0x00300 + (4 * (P)))
+#define IXGBE_PVFSTATUS(P) (0x00008 + (0 * (P)))
+#define IXGBE_PVFLINKS(P) (0x042A4 + (0 * (P)))
+#define IXGBE_PVFRTIMER(P) (0x00048 + (0 * (P)))
+#define IXGBE_PVFMAILBOX(P) (0x04C00 + (4 * (P)))
+#define IXGBE_PVFRXMEMWRAP(P) (0x03190 + (0 * (P)))
+#define IXGBE_PVTEICR(P) (0x00B00 + (4 * (P)))
+#define IXGBE_PVTEICS(P) (0x00C00 + (4 * (P)))
+#define IXGBE_PVTEIMS(P) (0x00D00 + (4 * (P)))
+#define IXGBE_PVTEIMC(P) (0x00E00 + (4 * (P)))
+#define IXGBE_PVTEIAC(P) (0x00F00 + (4 * (P)))
+#define IXGBE_PVTEIAM(P) (0x04D00 + (4 * (P)))
+#define IXGBE_PVTEITR(P) (((P) < 24) ? (0x00820 + ((P) * 4)) : \
+ (0x012300 + (((P) - 24) * 4)))
+#define IXGBE_PVTIVAR(P) (0x12500 + (4 * (P)))
+#define IXGBE_PVTIVAR_MISC(P) (0x04E00 + (4 * (P)))
+#define IXGBE_PVTRSCINT(P) (0x12000 + (4 * (P)))
+#define IXGBE_VFPBACL(P) (0x110C8 + (4 * (P)))
+#define IXGBE_PVFRDBAL(P) ((P < 64) ? (0x01000 + (0x40 * (P))) \
+ : (0x0D000 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFRDBAH(P) ((P < 64) ? (0x01004 + (0x40 * (P))) \
+ : (0x0D004 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFRDLEN(P) ((P < 64) ? (0x01008 + (0x40 * (P))) \
+ : (0x0D008 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFRDH(P) ((P < 64) ? (0x01010 + (0x40 * (P))) \
+ : (0x0D010 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFRDT(P) ((P < 64) ? (0x01018 + (0x40 * (P))) \
+ : (0x0D018 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFRXDCTL(P) ((P < 64) ? (0x01028 + (0x40 * (P))) \
+ : (0x0D028 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFSRRCTL(P) ((P < 64) ? (0x01014 + (0x40 * (P))) \
+ : (0x0D014 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFPSRTYPE(P) (0x0EA00 + (4 * (P)))
+#define IXGBE_PVFTDBAL(P) (0x06000 + (0x40 * (P)))
+#define IXGBE_PVFTDBAH(P) (0x06004 + (0x40 * (P)))
+#define IXGBE_PVFTTDLEN(P) (0x06008 + (0x40 * (P)))
+#define IXGBE_PVFTDH(P) (0x06010 + (0x40 * (P)))
+#define IXGBE_PVFTDT(P) (0x06018 + (0x40 * (P)))
+#define IXGBE_PVFTXDCTL(P) (0x06028 + (0x40 * (P)))
+#define IXGBE_PVFTDWBAL(P) (0x06038 + (0x40 * (P)))
+#define IXGBE_PVFTDWBAH(P) (0x0603C + (0x40 * (P)))
+#define IXGBE_PVFDCA_RXCTRL(P) (((P) < 64) ? (0x0100C + (0x40 * (P))) \
+ : (0x0D00C + (0x40 * ((P) - 64))))
+#define IXGBE_PVFDCA_TXCTRL(P) (0x0600C + (0x40 * (P)))
+#define IXGBE_PVFGPRC(x) (0x0101C + (0x40 * (x)))
+#define IXGBE_PVFGPTC(x) (0x08300 + (0x04 * (x)))
+#define IXGBE_PVFGORC_LSB(x) (0x01020 + (0x40 * (x)))
+#define IXGBE_PVFGORC_MSB(x) (0x0D020 + (0x40 * (x)))
+#define IXGBE_PVFGOTC_LSB(x) (0x08400 + (0x08 * (x)))
+#define IXGBE_PVFGOTC_MSB(x) (0x08404 + (0x08 * (x)))
+#define IXGBE_PVFMPRC(x) (0x0D01C + (0x40 * (x)))
+
+#define IXGBE_PVFTDWBALn(q_per_pool, vf_number, vf_q_index) \
+ (IXGBE_PVFTDWBAL((q_per_pool)*(vf_number) + (vf_q_index)))
+#define IXGBE_PVFTDWBAHn(q_per_pool, vf_number, vf_q_index) \
+ (IXGBE_PVFTDWBAH((q_per_pool)*(vf_number) + (vf_q_index)))
+
+/* Little Endian defines */
+#ifndef __le16
+#define __le16 u16
+#endif
+#ifndef __le32
+#define __le32 u32
+#endif
+#ifndef __le64
+#define __le64 u64
+
+#endif
+#ifndef __be16
+/* Big Endian defines */
+#define __be16 u16
+#define __be32 u32
+#define __be64 u64
+
+#endif
+enum ixgbe_fdir_pballoc_type {
+ IXGBE_FDIR_PBALLOC_NONE = 0,
+ IXGBE_FDIR_PBALLOC_64K = 1,
+ IXGBE_FDIR_PBALLOC_128K = 2,
+ IXGBE_FDIR_PBALLOC_256K = 3,
+};
+
+/* Flow Director register values */
+#define IXGBE_FDIRCTRL_PBALLOC_64K 0x00000001
+#define IXGBE_FDIRCTRL_PBALLOC_128K 0x00000002
+#define IXGBE_FDIRCTRL_PBALLOC_256K 0x00000003
+#define IXGBE_FDIRCTRL_INIT_DONE 0x00000008
+#define IXGBE_FDIRCTRL_PERFECT_MATCH 0x00000010
+#define IXGBE_FDIRCTRL_REPORT_STATUS 0x00000020
+#define IXGBE_FDIRCTRL_REPORT_STATUS_ALWAYS 0x00000080
+#define IXGBE_FDIRCTRL_DROP_Q_SHIFT 8
+#define IXGBE_FDIRCTRL_FLEX_SHIFT 16
+#define IXGBE_FDIRCTRL_SEARCHLIM 0x00800000
+#define IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT 24
+#define IXGBE_FDIRCTRL_FULL_THRESH_MASK 0xF0000000
+#define IXGBE_FDIRCTRL_FULL_THRESH_SHIFT 28
+
+#define IXGBE_FDIRTCPM_DPORTM_SHIFT 16
+#define IXGBE_FDIRUDPM_DPORTM_SHIFT 16
+#define IXGBE_FDIRIP6M_DIPM_SHIFT 16
+#define IXGBE_FDIRM_VLANID 0x00000001
+#define IXGBE_FDIRM_VLANP 0x00000002
+#define IXGBE_FDIRM_POOL 0x00000004
+#define IXGBE_FDIRM_L4P 0x00000008
+#define IXGBE_FDIRM_FLEX 0x00000010
+#define IXGBE_FDIRM_DIPv6 0x00000020
+
+#define IXGBE_FDIRFREE_FREE_MASK 0xFFFF
+#define IXGBE_FDIRFREE_FREE_SHIFT 0
+#define IXGBE_FDIRFREE_COLL_MASK 0x7FFF0000
+#define IXGBE_FDIRFREE_COLL_SHIFT 16
+#define IXGBE_FDIRLEN_MAXLEN_MASK 0x3F
+#define IXGBE_FDIRLEN_MAXLEN_SHIFT 0
+#define IXGBE_FDIRLEN_MAXHASH_MASK 0x7FFF0000
+#define IXGBE_FDIRLEN_MAXHASH_SHIFT 16
+#define IXGBE_FDIRUSTAT_ADD_MASK 0xFFFF
+#define IXGBE_FDIRUSTAT_ADD_SHIFT 0
+#define IXGBE_FDIRUSTAT_REMOVE_MASK 0xFFFF0000
+#define IXGBE_FDIRUSTAT_REMOVE_SHIFT 16
+#define IXGBE_FDIRFSTAT_FADD_MASK 0x00FF
+#define IXGBE_FDIRFSTAT_FADD_SHIFT 0
+#define IXGBE_FDIRFSTAT_FREMOVE_MASK 0xFF00
+#define IXGBE_FDIRFSTAT_FREMOVE_SHIFT 8
+#define IXGBE_FDIRPORT_DESTINATION_SHIFT 16
+#define IXGBE_FDIRVLAN_FLEX_SHIFT 16
+#define IXGBE_FDIRHASH_BUCKET_VALID_SHIFT 15
+#define IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT 16
+
+#define IXGBE_FDIRCMD_CMD_MASK 0x00000003
+#define IXGBE_FDIRCMD_CMD_ADD_FLOW 0x00000001
+#define IXGBE_FDIRCMD_CMD_REMOVE_FLOW 0x00000002
+#define IXGBE_FDIRCMD_CMD_QUERY_REM_FILT 0x00000003
+#define IXGBE_FDIRCMD_FILTER_VALID 0x00000004
+#define IXGBE_FDIRCMD_FILTER_UPDATE 0x00000008
+#define IXGBE_FDIRCMD_IPv6DMATCH 0x00000010
+#define IXGBE_FDIRCMD_L4TYPE_UDP 0x00000020
+#define IXGBE_FDIRCMD_L4TYPE_TCP 0x00000040
+#define IXGBE_FDIRCMD_L4TYPE_SCTP 0x00000060
+#define IXGBE_FDIRCMD_IPV6 0x00000080
+#define IXGBE_FDIRCMD_CLEARHT 0x00000100
+#define IXGBE_FDIRCMD_DROP 0x00000200
+#define IXGBE_FDIRCMD_INT 0x00000400
+#define IXGBE_FDIRCMD_LAST 0x00000800
+#define IXGBE_FDIRCMD_COLLISION 0x00001000
+#define IXGBE_FDIRCMD_QUEUE_EN 0x00008000
+#define IXGBE_FDIRCMD_FLOW_TYPE_SHIFT 5
+#define IXGBE_FDIRCMD_RX_QUEUE_SHIFT 16
+#define IXGBE_FDIRCMD_VT_POOL_SHIFT 24
+#define IXGBE_FDIR_INIT_DONE_POLL 10
+#define IXGBE_FDIRCMD_CMD_POLL 10
+
+#define IXGBE_FDIR_DROP_QUEUE 127
+
+#define IXGBE_STATUS_OVERHEATING_BIT 20 /* STATUS overtemp bit num */
+
+/* Manageablility Host Interface defines */
+#define IXGBE_HI_MAX_BLOCK_BYTE_LENGTH 1792 /* Num of bytes in range */
+#define IXGBE_HI_MAX_BLOCK_DWORD_LENGTH 448 /* Num of dwords in range */
+#define IXGBE_HI_COMMAND_TIMEOUT 500 /* Process HI command limit */
+
+/* CEM Support */
+#define FW_CEM_HDR_LEN 0x4
+#define FW_CEM_CMD_DRIVER_INFO 0xDD
+#define FW_CEM_CMD_DRIVER_INFO_LEN 0x5
+#define FW_CEM_CMD_RESERVED 0X0
+#define FW_CEM_UNUSED_VER 0x0
+#define FW_CEM_MAX_RETRIES 3
+#define FW_CEM_RESP_STATUS_SUCCESS 0x1
+
+/* Host Interface Command Structures */
+
+struct ixgbe_hic_hdr {
+ u8 cmd;
+ u8 buf_len;
+ union {
+ u8 cmd_resv;
+ u8 ret_status;
+ } cmd_or_resp;
+ u8 checksum;
+};
+
+struct ixgbe_hic_drv_info {
+ struct ixgbe_hic_hdr hdr;
+ u8 port_num;
+ u8 ver_sub;
+ u8 ver_build;
+ u8 ver_min;
+ u8 ver_maj;
+ u8 pad; /* end spacing to ensure length is mult. of dword */
+ u16 pad2; /* end spacing to ensure length is mult. of dword2 */
+};
+
+/* Transmit Descriptor - Legacy */
+struct ixgbe_legacy_tx_desc {
+ u64 buffer_addr; /* Address of the descriptor's data buffer */
+ union {
+ __le32 data;
+ struct {
+ __le16 length; /* Data buffer length */
+ u8 cso; /* Checksum offset */
+ u8 cmd; /* Descriptor control */
+ } flags;
+ } lower;
+ union {
+ __le32 data;
+ struct {
+ u8 status; /* Descriptor status */
+ u8 css; /* Checksum start */
+ __le16 vlan;
+ } fields;
+ } upper;
+};
+
+/* Transmit Descriptor - Advanced */
+union ixgbe_adv_tx_desc {
+ struct {
+ __le64 buffer_addr; /* Address of descriptor's data buf */
+ __le32 cmd_type_len;
+ __le32 olinfo_status;
+ } read;
+ struct {
+ __le64 rsvd; /* Reserved */
+ __le32 nxtseq_seed;
+ __le32 status;
+ } wb;
+};
+
+/* Receive Descriptor - Legacy */
+struct ixgbe_legacy_rx_desc {
+ __le64 buffer_addr; /* Address of the descriptor's data buffer */
+ __le16 length; /* Length of data DMAed into data buffer */
+ __le16 csum; /* Packet checksum */
+ u8 status; /* Descriptor status */
+ u8 errors; /* Descriptor Errors */
+ __le16 vlan;
+};
+
+/* Receive Descriptor - Advanced */
+union ixgbe_adv_rx_desc {
+ struct {
+ __le64 pkt_addr; /* Packet buffer address */
+ __le64 hdr_addr; /* Header buffer address */
+ } read;
+ struct {
+ struct {
+ union {
+ __le32 data;
+ struct {
+ __le16 pkt_info; /* RSS, Pkt type */
+ __le16 hdr_info; /* Splithdr, hdrlen */
+ } hs_rss;
+ } lo_dword;
+ union {
+ __le32 rss; /* RSS Hash */
+ struct {
+ __le16 ip_id; /* IP id */
+ __le16 csum; /* Packet Checksum */
+ } csum_ip;
+ } hi_dword;
+ } lower;
+ struct {
+ __le32 status_error; /* ext status/error */
+ __le16 length; /* Packet length */
+ __le16 vlan; /* VLAN tag */
+ } upper;
+ } wb; /* writeback */
+};
+
+/* Context descriptors */
+struct ixgbe_adv_tx_context_desc {
+ __le32 vlan_macip_lens;
+ __le32 seqnum_seed;
+ __le32 type_tucmd_mlhl;
+ __le32 mss_l4len_idx;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define IXGBE_ADVTXD_DTALEN_MASK 0x0000FFFF /* Data buf length(bytes) */
+#define IXGBE_ADVTXD_MAC_LINKSEC 0x00040000 /* Insert LinkSec */
+#define IXGBE_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 time stamp */
+#define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK 0x000003FF /* IPSec SA index */
+#define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK 0x000001FF /* IPSec ESP length */
+#define IXGBE_ADVTXD_DTYP_MASK 0x00F00000 /* DTYP mask */
+#define IXGBE_ADVTXD_DTYP_CTXT 0x00200000 /* Adv Context Desc */
+#define IXGBE_ADVTXD_DTYP_DATA 0x00300000 /* Adv Data Descriptor */
+#define IXGBE_ADVTXD_DCMD_EOP IXGBE_TXD_CMD_EOP /* End of Packet */
+#define IXGBE_ADVTXD_DCMD_IFCS IXGBE_TXD_CMD_IFCS /* Insert FCS */
+#define IXGBE_ADVTXD_DCMD_RS IXGBE_TXD_CMD_RS /* Report Status */
+#define IXGBE_ADVTXD_DCMD_DDTYP_ISCSI 0x10000000 /* DDP hdr type or iSCSI */
+#define IXGBE_ADVTXD_DCMD_DEXT IXGBE_TXD_CMD_DEXT /* Desc ext 1=Adv */
+#define IXGBE_ADVTXD_DCMD_VLE IXGBE_TXD_CMD_VLE /* VLAN pkt enable */
+#define IXGBE_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */
+#define IXGBE_ADVTXD_STAT_DD IXGBE_TXD_STAT_DD /* Descriptor Done */
+#define IXGBE_ADVTXD_STAT_SN_CRC 0x00000002 /* NXTSEQ/SEED pres in WB */
+#define IXGBE_ADVTXD_STAT_RSV 0x0000000C /* STA Reserved */
+#define IXGBE_ADVTXD_IDX_SHIFT 4 /* Adv desc Index shift */
+#define IXGBE_ADVTXD_CC 0x00000080 /* Check Context */
+#define IXGBE_ADVTXD_POPTS_SHIFT 8 /* Adv desc POPTS shift */
+#define IXGBE_ADVTXD_POPTS_IXSM (IXGBE_TXD_POPTS_IXSM << \
+ IXGBE_ADVTXD_POPTS_SHIFT)
+#define IXGBE_ADVTXD_POPTS_TXSM (IXGBE_TXD_POPTS_TXSM << \
+ IXGBE_ADVTXD_POPTS_SHIFT)
+#define IXGBE_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */
+#define IXGBE_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */
+#define IXGBE_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */
+/* 1st&Last TSO-full iSCSI PDU */
+#define IXGBE_ADVTXD_POPTS_ISCO_FULL 0x00001800
+#define IXGBE_ADVTXD_POPTS_RSV 0x00002000 /* POPTS Reserved */
+#define IXGBE_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
+#define IXGBE_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */
+#define IXGBE_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */
+#define IXGBE_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */
+#define IXGBE_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */
+#define IXGBE_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */
+#define IXGBE_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */
+#define IXGBE_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */
+#define IXGBE_ADVTXD_TUCMD_MKRREQ 0x00002000 /* req Markers and CRC */
+#define IXGBE_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */
+#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */
+#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */
+#define IXGBE_ADVTXT_TUCMD_FCOE 0x00008000 /* FCoE Frame Type */
+#define IXGBE_ADVTXD_FCOEF_EOF_MASK (0x3 << 10) /* FC EOF index */
+#define IXGBE_ADVTXD_FCOEF_SOF ((1 << 2) << 10) /* FC SOF index */
+#define IXGBE_ADVTXD_FCOEF_PARINC ((1 << 3) << 10) /* Rel_Off in F_CTL */
+#define IXGBE_ADVTXD_FCOEF_ORIE ((1 << 4) << 10) /* Orientation End */
+#define IXGBE_ADVTXD_FCOEF_ORIS ((1 << 5) << 10) /* Orientation Start */
+#define IXGBE_ADVTXD_FCOEF_EOF_N (0x0 << 10) /* 00: EOFn */
+#define IXGBE_ADVTXD_FCOEF_EOF_T (0x1 << 10) /* 01: EOFt */
+#define IXGBE_ADVTXD_FCOEF_EOF_NI (0x2 << 10) /* 10: EOFni */
+#define IXGBE_ADVTXD_FCOEF_EOF_A (0x3 << 10) /* 11: EOFa */
+#define IXGBE_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */
+#define IXGBE_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */
+
+/* Autonegotiation advertised speeds */
+typedef u32 ixgbe_autoneg_advertised;
+/* Link speed */
+typedef u32 ixgbe_link_speed;
+#define IXGBE_LINK_SPEED_UNKNOWN 0
+#define IXGBE_LINK_SPEED_100_FULL 0x0008
+#define IXGBE_LINK_SPEED_1GB_FULL 0x0020
+#define IXGBE_LINK_SPEED_10GB_FULL 0x0080
+#define IXGBE_LINK_SPEED_82598_AUTONEG (IXGBE_LINK_SPEED_1GB_FULL | \
+ IXGBE_LINK_SPEED_10GB_FULL)
+#define IXGBE_LINK_SPEED_82599_AUTONEG (IXGBE_LINK_SPEED_100_FULL | \
+ IXGBE_LINK_SPEED_1GB_FULL | \
+ IXGBE_LINK_SPEED_10GB_FULL)
+
+
+/* Physical layer type */
+typedef u32 ixgbe_physical_layer;
+#define IXGBE_PHYSICAL_LAYER_UNKNOWN 0
+#define IXGBE_PHYSICAL_LAYER_10GBASE_T 0x0001
+#define IXGBE_PHYSICAL_LAYER_1000BASE_T 0x0002
+#define IXGBE_PHYSICAL_LAYER_100BASE_TX 0x0004
+#define IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU 0x0008
+#define IXGBE_PHYSICAL_LAYER_10GBASE_LR 0x0010
+#define IXGBE_PHYSICAL_LAYER_10GBASE_LRM 0x0020
+#define IXGBE_PHYSICAL_LAYER_10GBASE_SR 0x0040
+#define IXGBE_PHYSICAL_LAYER_10GBASE_KX4 0x0080
+#define IXGBE_PHYSICAL_LAYER_10GBASE_CX4 0x0100
+#define IXGBE_PHYSICAL_LAYER_1000BASE_KX 0x0200
+#define IXGBE_PHYSICAL_LAYER_1000BASE_BX 0x0400
+#define IXGBE_PHYSICAL_LAYER_10GBASE_KR 0x0800
+#define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x1000
+#define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x2000
+#define IXGBE_PHYSICAL_LAYER_1000BASE_SX 0x4000
+
+/* Flow Control Data Sheet defined values
+ * Calculation and defines taken from 802.1bb Annex O
+ */
+
+/* BitTimes (BT) conversion */
+#define IXGBE_BT2KB(BT) ((BT + (8 * 1024 - 1)) / (8 * 1024))
+#define IXGBE_B2BT(BT) (BT * 8)
+
+/* Calculate Delay to respond to PFC */
+#define IXGBE_PFC_D 672
+
+/* Calculate Cable Delay */
+#define IXGBE_CABLE_DC 5556 /* Delay Copper */
+#define IXGBE_CABLE_DO 5000 /* Delay Optical */
+
+/* Calculate Interface Delay X540 */
+#define IXGBE_PHY_DC 25600 /* Delay 10G BASET */
+#define IXGBE_MAC_DC 8192 /* Delay Copper XAUI interface */
+#define IXGBE_XAUI_DC (2 * 2048) /* Delay Copper Phy */
+
+#define IXGBE_ID_X540 (IXGBE_MAC_DC + IXGBE_XAUI_DC + IXGBE_PHY_DC)
+
+/* Calculate Interface Delay 82598, 82599 */
+#define IXGBE_PHY_D 12800
+#define IXGBE_MAC_D 4096
+#define IXGBE_XAUI_D (2 * 1024)
+
+#define IXGBE_ID (IXGBE_MAC_D + IXGBE_XAUI_D + IXGBE_PHY_D)
+
+/* Calculate Delay incurred from higher layer */
+#define IXGBE_HD 6144
+
+/* Calculate PCI Bus delay for low thresholds */
+#define IXGBE_PCI_DELAY 10000
+
+/* Calculate X540 delay value in bit times */
+#define IXGBE_DV_X540(_max_frame_link, _max_frame_tc) \
+ ((36 * \
+ (IXGBE_B2BT(_max_frame_link) + \
+ IXGBE_PFC_D + \
+ (2 * IXGBE_CABLE_DC) + \
+ (2 * IXGBE_ID_X540) + \
+ IXGBE_HD) / 25 + 1) + \
+ 2 * IXGBE_B2BT(_max_frame_tc))
+
+/* Calculate 82599, 82598 delay value in bit times */
+#define IXGBE_DV(_max_frame_link, _max_frame_tc) \
+ ((36 * \
+ (IXGBE_B2BT(_max_frame_link) + \
+ IXGBE_PFC_D + \
+ (2 * IXGBE_CABLE_DC) + \
+ (2 * IXGBE_ID) + \
+ IXGBE_HD) / 25 + 1) + \
+ 2 * IXGBE_B2BT(_max_frame_tc))
+
+/* Calculate low threshold delay values */
+#define IXGBE_LOW_DV_X540(_max_frame_tc) \
+ (2 * IXGBE_B2BT(_max_frame_tc) + \
+ (36 * IXGBE_PCI_DELAY / 25) + 1)
+#define IXGBE_LOW_DV(_max_frame_tc) \
+ (2 * IXGBE_LOW_DV_X540(_max_frame_tc))
+
+/* Software ATR hash keys */
+#define IXGBE_ATR_BUCKET_HASH_KEY 0x3DAD14E2
+#define IXGBE_ATR_SIGNATURE_HASH_KEY 0x174D3614
+
+/* Software ATR input stream values and masks */
+#define IXGBE_ATR_HASH_MASK 0x7fff
+#define IXGBE_ATR_L4TYPE_MASK 0x3
+#define IXGBE_ATR_L4TYPE_UDP 0x1
+#define IXGBE_ATR_L4TYPE_TCP 0x2
+#define IXGBE_ATR_L4TYPE_SCTP 0x3
+#define IXGBE_ATR_L4TYPE_IPV6_MASK 0x4
+enum ixgbe_atr_flow_type {
+ IXGBE_ATR_FLOW_TYPE_IPV4 = 0x0,
+ IXGBE_ATR_FLOW_TYPE_UDPV4 = 0x1,
+ IXGBE_ATR_FLOW_TYPE_TCPV4 = 0x2,
+ IXGBE_ATR_FLOW_TYPE_SCTPV4 = 0x3,
+ IXGBE_ATR_FLOW_TYPE_IPV6 = 0x4,
+ IXGBE_ATR_FLOW_TYPE_UDPV6 = 0x5,
+ IXGBE_ATR_FLOW_TYPE_TCPV6 = 0x6,
+ IXGBE_ATR_FLOW_TYPE_SCTPV6 = 0x7,
+};
+
+/* Flow Director ATR input struct. */
+union ixgbe_atr_input {
+ /*
+ * Byte layout in order, all values with MSB first:
+ *
+ * vm_pool - 1 byte
+ * flow_type - 1 byte
+ * vlan_id - 2 bytes
+ * src_ip - 16 bytes
+ * dst_ip - 16 bytes
+ * src_port - 2 bytes
+ * dst_port - 2 bytes
+ * flex_bytes - 2 bytes
+ * bkt_hash - 2 bytes
+ */
+ struct {
+ u8 vm_pool;
+ u8 flow_type;
+ __be16 vlan_id;
+ __be32 dst_ip[4];
+ __be32 src_ip[4];
+ __be16 src_port;
+ __be16 dst_port;
+ __be16 flex_bytes;
+ __be16 bkt_hash;
+ } formatted;
+ __be32 dword_stream[11];
+};
+
+/* Flow Director compressed ATR hash input struct */
+union ixgbe_atr_hash_dword {
+ struct {
+ u8 vm_pool;
+ u8 flow_type;
+ __be16 vlan_id;
+ } formatted;
+ __be32 ip;
+ struct {
+ __be16 src;
+ __be16 dst;
+ } port;
+ __be16 flex_bytes;
+ __be32 dword;
+};
+
+
+/*
+ * Unavailable: The FCoE Boot Option ROM is not present in the flash.
+ * Disabled: Present; boot order is not set for any targets on the port.
+ * Enabled: Present; boot order is set for at least one target on the port.
+ */
+enum ixgbe_fcoe_boot_status {
+ ixgbe_fcoe_bootstatus_disabled = 0,
+ ixgbe_fcoe_bootstatus_enabled = 1,
+ ixgbe_fcoe_bootstatus_unavailable = 0xFFFF
+};
+
+enum ixgbe_eeprom_type {
+ ixgbe_eeprom_uninitialized = 0,
+ ixgbe_eeprom_spi,
+ ixgbe_flash,
+ ixgbe_eeprom_none /* No NVM support */
+};
+
+enum ixgbe_mac_type {
+ ixgbe_mac_unknown = 0,
+ ixgbe_mac_82598EB,
+ ixgbe_mac_82599EB,
+ ixgbe_mac_X540,
+ ixgbe_num_macs
+};
+
+enum ixgbe_phy_type {
+ ixgbe_phy_unknown = 0,
+ ixgbe_phy_none,
+ ixgbe_phy_tn,
+ ixgbe_phy_aq,
+ ixgbe_phy_cu_unknown,
+ ixgbe_phy_qt,
+ ixgbe_phy_xaui,
+ ixgbe_phy_nl,
+ ixgbe_phy_sfp_passive_tyco,
+ ixgbe_phy_sfp_passive_unknown,
+ ixgbe_phy_sfp_active_unknown,
+ ixgbe_phy_sfp_avago,
+ ixgbe_phy_sfp_ftl,
+ ixgbe_phy_sfp_ftl_active,
+ ixgbe_phy_sfp_unknown,
+ ixgbe_phy_sfp_intel,
+ ixgbe_phy_sfp_unsupported, /*Enforce bit set with unsupported module*/
+ ixgbe_phy_generic
+};
+
+/*
+ * SFP+ module type IDs:
+ *
+ * ID Module Type
+ * =============
+ * 0 SFP_DA_CU
+ * 1 SFP_SR
+ * 2 SFP_LR
+ * 3 SFP_DA_CU_CORE0 - 82599-specific
+ * 4 SFP_DA_CU_CORE1 - 82599-specific
+ * 5 SFP_SR/LR_CORE0 - 82599-specific
+ * 6 SFP_SR/LR_CORE1 - 82599-specific
+ */
+enum ixgbe_sfp_type {
+ ixgbe_sfp_type_da_cu = 0,
+ ixgbe_sfp_type_sr = 1,
+ ixgbe_sfp_type_lr = 2,
+ ixgbe_sfp_type_da_cu_core0 = 3,
+ ixgbe_sfp_type_da_cu_core1 = 4,
+ ixgbe_sfp_type_srlr_core0 = 5,
+ ixgbe_sfp_type_srlr_core1 = 6,
+ ixgbe_sfp_type_da_act_lmt_core0 = 7,
+ ixgbe_sfp_type_da_act_lmt_core1 = 8,
+ ixgbe_sfp_type_1g_cu_core0 = 9,
+ ixgbe_sfp_type_1g_cu_core1 = 10,
+ ixgbe_sfp_type_1g_sx_core0 = 11,
+ ixgbe_sfp_type_1g_sx_core1 = 12,
+ ixgbe_sfp_type_not_present = 0xFFFE,
+ ixgbe_sfp_type_unknown = 0xFFFF
+};
+
+enum ixgbe_media_type {
+ ixgbe_media_type_unknown = 0,
+ ixgbe_media_type_fiber,
+ ixgbe_media_type_fiber_qsfp,
+ ixgbe_media_type_fiber_lco,
+ ixgbe_media_type_copper,
+ ixgbe_media_type_backplane,
+ ixgbe_media_type_cx4,
+ ixgbe_media_type_virtual
+};
+
+/* Flow Control Settings */
+enum ixgbe_fc_mode {
+ ixgbe_fc_none = 0,
+ ixgbe_fc_rx_pause,
+ ixgbe_fc_tx_pause,
+ ixgbe_fc_full,
+ ixgbe_fc_default
+};
+
+/* Smart Speed Settings */
+#define IXGBE_SMARTSPEED_MAX_RETRIES 3
+enum ixgbe_smart_speed {
+ ixgbe_smart_speed_auto = 0,
+ ixgbe_smart_speed_on,
+ ixgbe_smart_speed_off
+};
+
+/* PCI bus types */
+enum ixgbe_bus_type {
+ ixgbe_bus_type_unknown = 0,
+ ixgbe_bus_type_pci,
+ ixgbe_bus_type_pcix,
+ ixgbe_bus_type_pci_express,
+ ixgbe_bus_type_reserved
+};
+
+/* PCI bus speeds */
+enum ixgbe_bus_speed {
+ ixgbe_bus_speed_unknown = 0,
+ ixgbe_bus_speed_33 = 33,
+ ixgbe_bus_speed_66 = 66,
+ ixgbe_bus_speed_100 = 100,
+ ixgbe_bus_speed_120 = 120,
+ ixgbe_bus_speed_133 = 133,
+ ixgbe_bus_speed_2500 = 2500,
+ ixgbe_bus_speed_5000 = 5000,
+ ixgbe_bus_speed_8000 = 8000,
+ ixgbe_bus_speed_reserved
+};
+
+/* PCI bus widths */
+enum ixgbe_bus_width {
+ ixgbe_bus_width_unknown = 0,
+ ixgbe_bus_width_pcie_x1 = 1,
+ ixgbe_bus_width_pcie_x2 = 2,
+ ixgbe_bus_width_pcie_x4 = 4,
+ ixgbe_bus_width_pcie_x8 = 8,
+ ixgbe_bus_width_32 = 32,
+ ixgbe_bus_width_64 = 64,
+ ixgbe_bus_width_reserved
+};
+
+struct ixgbe_addr_filter_info {
+ u32 num_mc_addrs;
+ u32 rar_used_count;
+ u32 mta_in_use;
+ u32 overflow_promisc;
+ bool user_set_promisc;
+};
+
+/* Bus parameters */
+struct ixgbe_bus_info {
+ enum ixgbe_bus_speed speed;
+ enum ixgbe_bus_width width;
+ enum ixgbe_bus_type type;
+
+ u16 func;
+ u16 lan_id;
+};
+
+/* Flow control parameters */
+struct ixgbe_fc_info {
+ u32 high_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl High-water */
+ u32 low_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl Low-water */
+ u16 pause_time; /* Flow Control Pause timer */
+ bool send_xon; /* Flow control send XON */
+ bool strict_ieee; /* Strict IEEE mode */
+ bool disable_fc_autoneg; /* Do not autonegotiate FC */
+ bool fc_was_autonegged; /* Is current_mode the result of autonegging? */
+ enum ixgbe_fc_mode current_mode; /* FC mode in effect */
+ enum ixgbe_fc_mode requested_mode; /* FC mode requested by caller */
+};
+
+/* Statistics counters collected by the MAC */
+struct ixgbe_hw_stats {
+ u64 crcerrs;
+ u64 illerrc;
+ u64 errbc;
+ u64 mspdc;
+ u64 mpctotal;
+ u64 mpc[8];
+ u64 mlfc;
+ u64 mrfc;
+ u64 rlec;
+ u64 lxontxc;
+ u64 lxonrxc;
+ u64 lxofftxc;
+ u64 lxoffrxc;
+ u64 pxontxc[8];
+ u64 pxonrxc[8];
+ u64 pxofftxc[8];
+ u64 pxoffrxc[8];
+ u64 prc64;
+ u64 prc127;
+ u64 prc255;
+ u64 prc511;
+ u64 prc1023;
+ u64 prc1522;
+ u64 gprc;
+ u64 bprc;
+ u64 mprc;
+ u64 gptc;
+ u64 gorc;
+ u64 gotc;
+ u64 rnbc[8];
+ u64 ruc;
+ u64 rfc;
+ u64 roc;
+ u64 rjc;
+ u64 mngprc;
+ u64 mngpdc;
+ u64 mngptc;
+ u64 tor;
+ u64 tpr;
+ u64 tpt;
+ u64 ptc64;
+ u64 ptc127;
+ u64 ptc255;
+ u64 ptc511;
+ u64 ptc1023;
+ u64 ptc1522;
+ u64 mptc;
+ u64 bptc;
+ u64 xec;
+ u64 qprc[16];
+ u64 qptc[16];
+ u64 qbrc[16];
+ u64 qbtc[16];
+ u64 qprdc[16];
+ u64 pxon2offc[8];
+ u64 fdirustat_add;
+ u64 fdirustat_remove;
+ u64 fdirfstat_fadd;
+ u64 fdirfstat_fremove;
+ u64 fdirmatch;
+ u64 fdirmiss;
+ u64 fccrc;
+ u64 fclast;
+ u64 fcoerpdc;
+ u64 fcoeprc;
+ u64 fcoeptc;
+ u64 fcoedwrc;
+ u64 fcoedwtc;
+ u64 fcoe_noddp;
+ u64 fcoe_noddp_ext_buff;
+ u64 ldpcec;
+ u64 pcrc8ec;
+ u64 b2ospc;
+ u64 b2ogprc;
+ u64 o2bgptc;
+ u64 o2bspc;
+};
+
+/* forward declaration */
+struct ixgbe_hw;
+
+/* iterator type for walking multicast address lists */
+typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr,
+ u32 *vmdq);
+
+/* Function pointer table */
+struct ixgbe_eeprom_operations {
+ s32 (*init_params)(struct ixgbe_hw *);
+ s32 (*read)(struct ixgbe_hw *, u16, u16 *);
+ s32 (*read_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
+ s32 (*write)(struct ixgbe_hw *, u16, u16);
+ s32 (*write_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
+ s32 (*validate_checksum)(struct ixgbe_hw *, u16 *);
+ s32 (*update_checksum)(struct ixgbe_hw *);
+ u16 (*calc_checksum)(struct ixgbe_hw *);
+};
+
+struct ixgbe_mac_operations {
+ s32 (*init_hw)(struct ixgbe_hw *);
+ s32 (*reset_hw)(struct ixgbe_hw *);
+ s32 (*start_hw)(struct ixgbe_hw *);
+ s32 (*clear_hw_cntrs)(struct ixgbe_hw *);
+ enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *);
+ u32 (*get_supported_physical_layer)(struct ixgbe_hw *);
+ s32 (*get_mac_addr)(struct ixgbe_hw *, u8 *);
+ s32 (*get_san_mac_addr)(struct ixgbe_hw *, u8 *);
+ s32 (*set_san_mac_addr)(struct ixgbe_hw *, u8 *);
+ s32 (*get_device_caps)(struct ixgbe_hw *, u16 *);
+ s32 (*get_wwn_prefix)(struct ixgbe_hw *, u16 *, u16 *);
+ s32 (*get_fcoe_boot_status)(struct ixgbe_hw *, u16 *);
+ s32 (*stop_adapter)(struct ixgbe_hw *);
+ s32 (*get_bus_info)(struct ixgbe_hw *);
+ void (*set_lan_id)(struct ixgbe_hw *);
+ s32 (*read_analog_reg8)(struct ixgbe_hw*, u32, u8*);
+ s32 (*write_analog_reg8)(struct ixgbe_hw*, u32, u8);
+ s32 (*setup_sfp)(struct ixgbe_hw *);
+ s32 (*enable_rx_dma)(struct ixgbe_hw *, u32);
+ s32 (*disable_sec_rx_path)(struct ixgbe_hw *);
+ s32 (*enable_sec_rx_path)(struct ixgbe_hw *);
+ s32 (*acquire_swfw_sync)(struct ixgbe_hw *, u16);
+ void (*release_swfw_sync)(struct ixgbe_hw *, u16);
+
+ /* Link */
+ void (*disable_tx_laser)(struct ixgbe_hw *);
+ void (*enable_tx_laser)(struct ixgbe_hw *);
+ void (*flap_tx_laser)(struct ixgbe_hw *);
+ s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool);
+ s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool);
+ s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *,
+ bool *);
+
+ /* Packet Buffer manipulation */
+ void (*setup_rxpba)(struct ixgbe_hw *, int, u32, int);
+
+ /* LED */
+ s32 (*led_on)(struct ixgbe_hw *, u32);
+ s32 (*led_off)(struct ixgbe_hw *, u32);
+ s32 (*blink_led_start)(struct ixgbe_hw *, u32);
+ s32 (*blink_led_stop)(struct ixgbe_hw *, u32);
+
+ /* RAR, Multicast, VLAN */
+ s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32);
+ s32 (*set_uc_addr)(struct ixgbe_hw *, u32, u8 *);
+ s32 (*clear_rar)(struct ixgbe_hw *, u32);
+ s32 (*insert_mac_addr)(struct ixgbe_hw *, u8 *, u32);
+ s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32);
+ s32 (*set_vmdq_san_mac)(struct ixgbe_hw *, u32);
+ s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32);
+ s32 (*init_rx_addrs)(struct ixgbe_hw *);
+ s32 (*update_uc_addr_list)(struct ixgbe_hw *, u8 *, u32,
+ ixgbe_mc_addr_itr);
+ s32 (*update_mc_addr_list)(struct ixgbe_hw *, u8 *, u32,
+ ixgbe_mc_addr_itr, bool clear);
+ s32 (*enable_mc)(struct ixgbe_hw *);
+ s32 (*disable_mc)(struct ixgbe_hw *);
+ s32 (*clear_vfta)(struct ixgbe_hw *);
+ s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool);
+ s32 (*set_vlvf)(struct ixgbe_hw *, u32, u32, bool, bool *);
+ s32 (*init_uta_tables)(struct ixgbe_hw *);
+ void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int);
+ void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int);
+
+ /* Flow Control */
+ s32 (*fc_enable)(struct ixgbe_hw *);
+
+ /* Manageability interface */
+ s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8);
+ s32 (*get_thermal_sensor_data)(struct ixgbe_hw *);
+ s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw);
+};
+
+struct ixgbe_phy_operations {
+ s32 (*identify)(struct ixgbe_hw *);
+ s32 (*identify_sfp)(struct ixgbe_hw *);
+ s32 (*init)(struct ixgbe_hw *);
+ s32 (*reset)(struct ixgbe_hw *);
+ s32 (*read_reg)(struct ixgbe_hw *, u32, u32, u16 *);
+ s32 (*write_reg)(struct ixgbe_hw *, u32, u32, u16);
+ s32 (*setup_link)(struct ixgbe_hw *);
+ s32 (*setup_link_speed)(struct ixgbe_hw *, ixgbe_link_speed, bool,
+ bool);
+ s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *);
+ s32 (*get_firmware_version)(struct ixgbe_hw *, u16 *);
+ s32 (*read_i2c_byte)(struct ixgbe_hw *, u8, u8, u8 *);
+ s32 (*write_i2c_byte)(struct ixgbe_hw *, u8, u8, u8);
+ s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *);
+ s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8);
+ void (*i2c_bus_clear)(struct ixgbe_hw *);
+ s32 (*check_overtemp)(struct ixgbe_hw *);
+};
+
+struct ixgbe_eeprom_info {
+ struct ixgbe_eeprom_operations ops;
+ enum ixgbe_eeprom_type type;
+ u32 semaphore_delay;
+ u16 word_size;
+ u16 address_bits;
+ u16 word_page_size;
+};
+
+#define IXGBE_FLAGS_DOUBLE_RESET_REQUIRED 0x01
+struct ixgbe_mac_info {
+ struct ixgbe_mac_operations ops;
+ enum ixgbe_mac_type type;
+ u8 addr[IXGBE_ETH_LENGTH_OF_ADDRESS];
+ u8 perm_addr[IXGBE_ETH_LENGTH_OF_ADDRESS];
+ u8 san_addr[IXGBE_ETH_LENGTH_OF_ADDRESS];
+ /* prefix for World Wide Node Name (WWNN) */
+ u16 wwnn_prefix;
+ /* prefix for World Wide Port Name (WWPN) */
+ u16 wwpn_prefix;
+#define IXGBE_MAX_MTA 128
+ u32 mta_shadow[IXGBE_MAX_MTA];
+ s32 mc_filter_type;
+ u32 mcft_size;
+ u32 vft_size;
+ u32 num_rar_entries;
+ u32 rar_highwater;
+ u32 rx_pb_size;
+ u32 max_tx_queues;
+ u32 max_rx_queues;
+ u32 orig_autoc;
+ u8 san_mac_rar_index;
+ u32 orig_autoc2;
+ u16 max_msix_vectors;
+ bool arc_subsystem_valid;
+ bool orig_link_settings_stored;
+ bool autotry_restart;
+ u8 flags;
+ struct ixgbe_thermal_sensor_data thermal_sensor_data;
+};
+
+struct ixgbe_phy_info {
+ struct ixgbe_phy_operations ops;
+ enum ixgbe_phy_type type;
+ u32 addr;
+ u32 id;
+ enum ixgbe_sfp_type sfp_type;
+ bool sfp_setup_needed;
+ u32 revision;
+ enum ixgbe_media_type media_type;
+ bool reset_disable;
+ ixgbe_autoneg_advertised autoneg_advertised;
+ enum ixgbe_smart_speed smart_speed;
+ bool smart_speed_active;
+ bool multispeed_fiber;
+ bool reset_if_overtemp;
+ bool qsfp_shared_i2c_bus;
+};
+
+#include "ixgbe_mbx.h"
+
+struct ixgbe_mbx_operations {
+ void (*init_params)(struct ixgbe_hw *hw);
+ s32 (*read)(struct ixgbe_hw *, u32 *, u16, u16);
+ s32 (*write)(struct ixgbe_hw *, u32 *, u16, u16);
+ s32 (*read_posted)(struct ixgbe_hw *, u32 *, u16, u16);
+ s32 (*write_posted)(struct ixgbe_hw *, u32 *, u16, u16);
+ s32 (*check_for_msg)(struct ixgbe_hw *, u16);
+ s32 (*check_for_ack)(struct ixgbe_hw *, u16);
+ s32 (*check_for_rst)(struct ixgbe_hw *, u16);
+};
+
+struct ixgbe_mbx_stats {
+ u32 msgs_tx;
+ u32 msgs_rx;
+
+ u32 acks;
+ u32 reqs;
+ u32 rsts;
+};
+
+struct ixgbe_mbx_info {
+ struct ixgbe_mbx_operations ops;
+ struct ixgbe_mbx_stats stats;
+ u32 timeout;
+ u32 udelay;
+ u32 v2p_mailbox;
+ u16 size;
+};
+
+struct ixgbe_hw {
+ u8 __iomem *hw_addr;
+ void *back;
+ struct ixgbe_mac_info mac;
+ struct ixgbe_addr_filter_info addr_ctrl;
+ struct ixgbe_fc_info fc;
+ struct ixgbe_phy_info phy;
+ struct ixgbe_eeprom_info eeprom;
+ struct ixgbe_bus_info bus;
+ struct ixgbe_mbx_info mbx;
+ u16 device_id;
+ u16 vendor_id;
+ u16 subsystem_device_id;
+ u16 subsystem_vendor_id;
+ u8 revision_id;
+ bool adapter_stopped;
+ bool force_full_reset;
+ bool allow_unsupported_sfp;
+};
+
+#define ixgbe_call_func(hw, func, params, error) \
+ (func != NULL) ? func params : error
+
+
+/* Error Codes */
+#define IXGBE_ERR_EEPROM -1
+#define IXGBE_ERR_EEPROM_CHECKSUM -2
+#define IXGBE_ERR_PHY -3
+#define IXGBE_ERR_CONFIG -4
+#define IXGBE_ERR_PARAM -5
+#define IXGBE_ERR_MAC_TYPE -6
+#define IXGBE_ERR_UNKNOWN_PHY -7
+#define IXGBE_ERR_LINK_SETUP -8
+#define IXGBE_ERR_ADAPTER_STOPPED -9
+#define IXGBE_ERR_INVALID_MAC_ADDR -10
+#define IXGBE_ERR_DEVICE_NOT_SUPPORTED -11
+#define IXGBE_ERR_MASTER_REQUESTS_PENDING -12
+#define IXGBE_ERR_INVALID_LINK_SETTINGS -13
+#define IXGBE_ERR_AUTONEG_NOT_COMPLETE -14
+#define IXGBE_ERR_RESET_FAILED -15
+#define IXGBE_ERR_SWFW_SYNC -16
+#define IXGBE_ERR_PHY_ADDR_INVALID -17
+#define IXGBE_ERR_I2C -18
+#define IXGBE_ERR_SFP_NOT_SUPPORTED -19
+#define IXGBE_ERR_SFP_NOT_PRESENT -20
+#define IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT -21
+#define IXGBE_ERR_NO_SAN_ADDR_PTR -22
+#define IXGBE_ERR_FDIR_REINIT_FAILED -23
+#define IXGBE_ERR_EEPROM_VERSION -24
+#define IXGBE_ERR_NO_SPACE -25
+#define IXGBE_ERR_OVERTEMP -26
+#define IXGBE_ERR_FC_NOT_NEGOTIATED -27
+#define IXGBE_ERR_FC_NOT_SUPPORTED -28
+#define IXGBE_ERR_SFP_SETUP_NOT_COMPLETE -30
+#define IXGBE_ERR_PBA_SECTION -31
+#define IXGBE_ERR_INVALID_ARGUMENT -32
+#define IXGBE_ERR_HOST_INTERFACE_COMMAND -33
+#define IXGBE_ERR_OUT_OF_MEM -34
+
+#define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF
+
+#define UNREFERENCED_XPARAMETER
+
+#endif /* _IXGBE_TYPE_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
new file mode 100644
index 00000000..b99d9e84
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
@@ -0,0 +1,937 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_x540.h"
+#include "ixgbe_type.h"
+#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+
+static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw);
+static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw);
+static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw);
+static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw);
+
+/**
+ * ixgbe_init_ops_X540 - Inits func ptrs and MAC type
+ * @hw: pointer to hardware structure
+ *
+ * Initialize the function pointers and assign the MAC type for X540.
+ * Does not touch the hardware.
+ **/
+s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw)
+{
+ struct ixgbe_mac_info *mac = &hw->mac;
+ struct ixgbe_phy_info *phy = &hw->phy;
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+ s32 ret_val;
+
+ ret_val = ixgbe_init_phy_ops_generic(hw);
+ ret_val = ixgbe_init_ops_generic(hw);
+
+
+ /* EEPROM */
+ eeprom->ops.init_params = &ixgbe_init_eeprom_params_X540;
+ eeprom->ops.read = &ixgbe_read_eerd_X540;
+ eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_X540;
+ eeprom->ops.write = &ixgbe_write_eewr_X540;
+ eeprom->ops.write_buffer = &ixgbe_write_eewr_buffer_X540;
+ eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_X540;
+ eeprom->ops.validate_checksum = &ixgbe_validate_eeprom_checksum_X540;
+ eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_X540;
+
+ /* PHY */
+ phy->ops.init = &ixgbe_init_phy_ops_generic;
+ phy->ops.reset = NULL;
+
+ /* MAC */
+ mac->ops.reset_hw = &ixgbe_reset_hw_X540;
+ mac->ops.get_media_type = &ixgbe_get_media_type_X540;
+ mac->ops.get_supported_physical_layer =
+ &ixgbe_get_supported_physical_layer_X540;
+ mac->ops.read_analog_reg8 = NULL;
+ mac->ops.write_analog_reg8 = NULL;
+ mac->ops.start_hw = &ixgbe_start_hw_X540;
+ mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic;
+ mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic;
+ mac->ops.get_device_caps = &ixgbe_get_device_caps_generic;
+ mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic;
+ mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic;
+ mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync_X540;
+ mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync_X540;
+ mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic;
+ mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic;
+
+ /* RAR, Multicast, VLAN */
+ mac->ops.set_vmdq = &ixgbe_set_vmdq_generic;
+ mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic;
+ mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic;
+ mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic;
+ mac->rar_highwater = 1;
+ mac->ops.set_vfta = &ixgbe_set_vfta_generic;
+ mac->ops.set_vlvf = &ixgbe_set_vlvf_generic;
+ mac->ops.clear_vfta = &ixgbe_clear_vfta_generic;
+ mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic;
+ mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing;
+ mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing;
+
+ /* Link */
+ mac->ops.get_link_capabilities =
+ &ixgbe_get_copper_link_capabilities_generic;
+ mac->ops.setup_link = &ixgbe_setup_mac_link_X540;
+ mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic;
+ mac->ops.check_link = &ixgbe_check_mac_link_generic;
+
+ mac->mcft_size = 128;
+ mac->vft_size = 128;
+ mac->num_rar_entries = 128;
+ mac->rx_pb_size = 384;
+ mac->max_tx_queues = 128;
+ mac->max_rx_queues = 128;
+ mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw);
+
+ /*
+ * FWSM register
+ * ARC supported; valid only if manageability features are
+ * enabled.
+ */
+ mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) &
+ IXGBE_FWSM_MODE_MASK) ? true : false;
+
+ //hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf;
+
+ /* LEDs */
+ mac->ops.blink_led_start = ixgbe_blink_led_start_X540;
+ mac->ops.blink_led_stop = ixgbe_blink_led_stop_X540;
+
+ /* Manageability interface */
+ mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic;
+
+ return ret_val;
+}
+
+/**
+ * ixgbe_get_link_capabilities_X540 - Determines link capabilities
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @autoneg: true when autoneg or autotry is enabled
+ *
+ * Determines the link capabilities by reading the AUTOC register.
+ **/
+s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed,
+ bool *autoneg)
+{
+ ixgbe_get_copper_link_capabilities_generic(hw, speed, autoneg);
+
+ return 0;
+}
+
+/**
+ * ixgbe_get_media_type_X540 - Get media type
+ * @hw: pointer to hardware structure
+ *
+ * Returns the media type (fiber, copper, backplane)
+ **/
+enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw)
+{
+ return ixgbe_media_type_copper;
+}
+
+/**
+ * ixgbe_setup_mac_link_X540 - Sets the auto advertised capabilities
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg: true if autonegotiation enabled
+ * @autoneg_wait_to_complete: true when waiting for completion is needed
+ **/
+s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed, bool autoneg,
+ bool autoneg_wait_to_complete)
+{
+ return hw->phy.ops.setup_link_speed(hw, speed, autoneg,
+ autoneg_wait_to_complete);
+}
+
+/**
+ * ixgbe_reset_hw_X540 - Perform hardware reset
+ * @hw: pointer to hardware structure
+ *
+ * Resets the hardware by resetting the transmit and receive units, masks
+ * and clears all interrupts, and perform a reset.
+ **/
+s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw)
+{
+ s32 status = 0;
+
+ /*
+ * Userland DPDK takes the ownershiop of device
+ * Kernel driver here used as the simple path for ethtool only
+ * Won't real reset device anyway
+ */
+#if 0
+ u32 ctrl, i;
+
+ /* Call adapter stop to disable tx/rx and clear interrupts */
+ status = hw->mac.ops.stop_adapter(hw);
+ if (status != 0)
+ goto reset_hw_out;
+
+ /* flush pending Tx transactions */
+ ixgbe_clear_tx_pending(hw);
+
+mac_reset_top:
+ ctrl = IXGBE_CTRL_RST;
+ ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
+ IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* Poll for reset bit to self-clear indicating reset is complete */
+ for (i = 0; i < 10; i++) {
+ udelay(1);
+ ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+ if (!(ctrl & IXGBE_CTRL_RST_MASK))
+ break;
+ }
+
+ if (ctrl & IXGBE_CTRL_RST_MASK) {
+ status = IXGBE_ERR_RESET_FAILED;
+ hw_dbg(hw, "Reset polling failed to complete.\n");
+ }
+ msleep(100);
+
+ /*
+ * Double resets are required for recovery from certain error
+ * conditions. Between resets, it is necessary to stall to allow time
+ * for any pending HW events to complete.
+ */
+ if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
+ hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+ goto mac_reset_top;
+ }
+
+ /* Set the Rx packet buffer size. */
+ IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(0), 384 << IXGBE_RXPBSIZE_SHIFT);
+
+#endif
+
+ /* Store the permanent mac address */
+ hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
+ /*
+ * Store MAC address from RAR0, clear receive address registers, and
+ * clear the multicast table. Also reset num_rar_entries to 128,
+ * since we modify this value when programming the SAN MAC address.
+ */
+ hw->mac.num_rar_entries = 128;
+ hw->mac.ops.init_rx_addrs(hw);
+
+ /* Store the permanent SAN mac address */
+ hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
+
+ /* Add the SAN MAC address to the RAR only if it's a valid address */
+ if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
+ hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
+ hw->mac.san_addr, 0, IXGBE_RAH_AV);
+
+ /* Save the SAN MAC RAR index */
+ hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
+
+ /* Reserve the last RAR for the SAN MAC address */
+ hw->mac.num_rar_entries--;
+ }
+
+ /* Store the alternative WWNN/WWPN prefix */
+ hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix,
+ &hw->mac.wwpn_prefix);
+
+//reset_hw_out:
+ return status;
+}
+
+/**
+ * ixgbe_start_hw_X540 - Prepare hardware for Tx/Rx
+ * @hw: pointer to hardware structure
+ *
+ * Starts the hardware using the generic start_hw function
+ * and the generation start_hw function.
+ * Then performs revision-specific operations, if any.
+ **/
+s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw)
+{
+ s32 ret_val = 0;
+
+ ret_val = ixgbe_start_hw_generic(hw);
+ if (ret_val != 0)
+ goto out;
+
+ ret_val = ixgbe_start_hw_gen2(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_get_supported_physical_layer_X540 - Returns physical layer type
+ * @hw: pointer to hardware structure
+ *
+ * Determines physical layer capabilities of the current configuration.
+ **/
+u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw)
+{
+ u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
+ u16 ext_ability = 0;
+
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability);
+ if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T;
+ if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
+ if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX;
+
+ return physical_layer;
+}
+
+/**
+ * ixgbe_init_eeprom_params_X540 - Initialize EEPROM params
+ * @hw: pointer to hardware structure
+ *
+ * Initializes the EEPROM parameters ixgbe_eeprom_info within the
+ * ixgbe_hw struct in order to set up EEPROM access.
+ **/
+s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw)
+{
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+ u32 eec;
+ u16 eeprom_size;
+
+ if (eeprom->type == ixgbe_eeprom_uninitialized) {
+ eeprom->semaphore_delay = 10;
+ eeprom->type = ixgbe_flash;
+
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+ eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+ IXGBE_EEC_SIZE_SHIFT);
+ eeprom->word_size = 1 << (eeprom_size +
+ IXGBE_EEPROM_WORD_SIZE_SHIFT);
+
+ hw_dbg(hw, "Eeprom params: type = %d, size = %d\n",
+ eeprom->type, eeprom->word_size);
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_read_eerd_X540- Read EEPROM word using EERD
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM using the EERD register.
+ **/
+s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data)
+{
+ s32 status = 0;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+ 0)
+ status = ixgbe_read_eerd_generic(hw, offset, data);
+ else
+ status = IXGBE_ERR_SWFW_SYNC;
+
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+ return status;
+}
+
+/**
+ * ixgbe_read_eerd_buffer_X540- Read EEPROM word(s) using EERD
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @words: number of words
+ * @data: word(s) read from the EEPROM
+ *
+ * Reads a 16 bit word(s) from the EEPROM using the EERD register.
+ **/
+s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw,
+ u16 offset, u16 words, u16 *data)
+{
+ s32 status = 0;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+ 0)
+ status = ixgbe_read_eerd_buffer_generic(hw, offset,
+ words, data);
+ else
+ status = IXGBE_ERR_SWFW_SYNC;
+
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+ return status;
+}
+
+/**
+ * ixgbe_write_eewr_X540 - Write EEPROM word using EEWR
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @data: word write to the EEPROM
+ *
+ * Write a 16 bit word to the EEPROM using the EEWR register.
+ **/
+s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+ s32 status = 0;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+ 0)
+ status = ixgbe_write_eewr_generic(hw, offset, data);
+ else
+ status = IXGBE_ERR_SWFW_SYNC;
+
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+ return status;
+}
+
+/**
+ * ixgbe_write_eewr_buffer_X540 - Write EEPROM word(s) using EEWR
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @words: number of words
+ * @data: word(s) write to the EEPROM
+ *
+ * Write a 16 bit word(s) to the EEPROM using the EEWR register.
+ **/
+s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw,
+ u16 offset, u16 words, u16 *data)
+{
+ s32 status = 0;
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+ 0)
+ status = ixgbe_write_eewr_buffer_generic(hw, offset,
+ words, data);
+ else
+ status = IXGBE_ERR_SWFW_SYNC;
+
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+ return status;
+}
+
+/**
+ * ixgbe_calc_eeprom_checksum_X540 - Calculates and returns the checksum
+ *
+ * This function does not use synchronization for EERD and EEWR. It can
+ * be used internally by function which utilize ixgbe_acquire_swfw_sync_X540.
+ *
+ * @hw: pointer to hardware structure
+ **/
+u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw)
+{
+ u16 i;
+ u16 j;
+ u16 checksum = 0;
+ u16 length = 0;
+ u16 pointer = 0;
+ u16 word = 0;
+
+ /*
+ * Do not use hw->eeprom.ops.read because we do not want to take
+ * the synchronization semaphores here. Instead use
+ * ixgbe_read_eerd_generic
+ */
+
+ /* Include 0x0-0x3F in the checksum */
+ for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) {
+ if (ixgbe_read_eerd_generic(hw, i, &word) != 0) {
+ hw_dbg(hw, "EEPROM read failed\n");
+ break;
+ }
+ checksum += word;
+ }
+
+ /*
+ * Include all data from pointers 0x3, 0x6-0xE. This excludes the
+ * FW, PHY module, and PCIe Expansion/Option ROM pointers.
+ */
+ for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) {
+ if (i == IXGBE_PHY_PTR || i == IXGBE_OPTION_ROM_PTR)
+ continue;
+
+ if (ixgbe_read_eerd_generic(hw, i, &pointer) != 0) {
+ hw_dbg(hw, "EEPROM read failed\n");
+ break;
+ }
+
+ /* Skip pointer section if the pointer is invalid. */
+ if (pointer == 0xFFFF || pointer == 0 ||
+ pointer >= hw->eeprom.word_size)
+ continue;
+
+ if (ixgbe_read_eerd_generic(hw, pointer, &length) !=
+ 0) {
+ hw_dbg(hw, "EEPROM read failed\n");
+ break;
+ }
+
+ /* Skip pointer section if length is invalid. */
+ if (length == 0xFFFF || length == 0 ||
+ (pointer + length) >= hw->eeprom.word_size)
+ continue;
+
+ for (j = pointer+1; j <= pointer+length; j++) {
+ if (ixgbe_read_eerd_generic(hw, j, &word) !=
+ 0) {
+ hw_dbg(hw, "EEPROM read failed\n");
+ break;
+ }
+ checksum += word;
+ }
+ }
+
+ checksum = (u16)IXGBE_EEPROM_SUM - checksum;
+
+ return checksum;
+}
+
+/**
+ * ixgbe_validate_eeprom_checksum_X540 - Validate EEPROM checksum
+ * @hw: pointer to hardware structure
+ * @checksum_val: calculated checksum
+ *
+ * Performs checksum calculation and validates the EEPROM checksum. If the
+ * caller does not need checksum_val, the value can be NULL.
+ **/
+s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw,
+ u16 *checksum_val)
+{
+ s32 status;
+ u16 checksum;
+ u16 read_checksum = 0;
+
+ /*
+ * Read the first word from the EEPROM. If this times out or fails, do
+ * not continue or we could be in for a very long wait while every
+ * EEPROM read fails
+ */
+ status = hw->eeprom.ops.read(hw, 0, &checksum);
+
+ if (status != 0) {
+ hw_dbg(hw, "EEPROM read failed\n");
+ goto out;
+ }
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+ 0) {
+ checksum = hw->eeprom.ops.calc_checksum(hw);
+
+ /*
+ * Do not use hw->eeprom.ops.read because we do not want to take
+ * the synchronization semaphores twice here.
+ */
+ ixgbe_read_eerd_generic(hw, IXGBE_EEPROM_CHECKSUM,
+ &read_checksum);
+
+ /*
+ * Verify read checksum from EEPROM is the same as
+ * calculated checksum
+ */
+ if (read_checksum != checksum)
+ status = IXGBE_ERR_EEPROM_CHECKSUM;
+
+ /* If the user cares, return the calculated checksum */
+ if (checksum_val)
+ *checksum_val = checksum;
+ } else {
+ status = IXGBE_ERR_SWFW_SYNC;
+ }
+
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+out:
+ return status;
+}
+
+/**
+ * ixgbe_update_eeprom_checksum_X540 - Updates the EEPROM checksum and flash
+ * @hw: pointer to hardware structure
+ *
+ * After writing EEPROM to shadow RAM using EEWR register, software calculates
+ * checksum and updates the EEPROM and instructs the hardware to update
+ * the flash.
+ **/
+s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw)
+{
+ s32 status;
+ u16 checksum;
+
+ /*
+ * Read the first word from the EEPROM. If this times out or fails, do
+ * not continue or we could be in for a very long wait while every
+ * EEPROM read fails
+ */
+ status = hw->eeprom.ops.read(hw, 0, &checksum);
+
+ if (status != 0)
+ hw_dbg(hw, "EEPROM read failed\n");
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+ 0) {
+ checksum = hw->eeprom.ops.calc_checksum(hw);
+
+ /*
+ * Do not use hw->eeprom.ops.write because we do not want to
+ * take the synchronization semaphores twice here.
+ */
+ status = ixgbe_write_eewr_generic(hw, IXGBE_EEPROM_CHECKSUM,
+ checksum);
+
+ if (status == 0)
+ status = ixgbe_update_flash_X540(hw);
+ else
+ status = IXGBE_ERR_SWFW_SYNC;
+ }
+
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+
+ return status;
+}
+
+/**
+ * ixgbe_update_flash_X540 - Instruct HW to copy EEPROM to Flash device
+ * @hw: pointer to hardware structure
+ *
+ * Set FLUP (bit 23) of the EEC register to instruct Hardware to copy
+ * EEPROM from shadow RAM to the flash device.
+ **/
+static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw)
+{
+ u32 flup;
+ s32 status = IXGBE_ERR_EEPROM;
+
+ status = ixgbe_poll_flash_update_done_X540(hw);
+ if (status == IXGBE_ERR_EEPROM) {
+ hw_dbg(hw, "Flash update time out\n");
+ goto out;
+ }
+
+ flup = IXGBE_READ_REG(hw, IXGBE_EEC) | IXGBE_EEC_FLUP;
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, flup);
+
+ status = ixgbe_poll_flash_update_done_X540(hw);
+ if (status == 0)
+ hw_dbg(hw, "Flash update complete\n");
+ else
+ hw_dbg(hw, "Flash update time out\n");
+
+ if (hw->revision_id == 0) {
+ flup = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+ if (flup & IXGBE_EEC_SEC1VAL) {
+ flup |= IXGBE_EEC_FLUP;
+ IXGBE_WRITE_REG(hw, IXGBE_EEC, flup);
+ }
+
+ status = ixgbe_poll_flash_update_done_X540(hw);
+ if (status == 0)
+ hw_dbg(hw, "Flash update complete\n");
+ else
+ hw_dbg(hw, "Flash update time out\n");
+ }
+out:
+ return status;
+}
+
+/**
+ * ixgbe_poll_flash_update_done_X540 - Poll flash update status
+ * @hw: pointer to hardware structure
+ *
+ * Polls the FLUDONE (bit 26) of the EEC Register to determine when the
+ * flash update is done.
+ **/
+static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw)
+{
+ u32 i;
+ u32 reg;
+ s32 status = IXGBE_ERR_EEPROM;
+
+ for (i = 0; i < IXGBE_FLUDONE_ATTEMPTS; i++) {
+ reg = IXGBE_READ_REG(hw, IXGBE_EEC);
+ if (reg & IXGBE_EEC_FLUDONE) {
+ status = 0;
+ break;
+ }
+ udelay(5);
+ }
+ return status;
+}
+
+/**
+ * ixgbe_acquire_swfw_sync_X540 - Acquire SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to acquire
+ *
+ * Acquires the SWFW semaphore thought the SW_FW_SYNC register for
+ * the specified function (CSR, PHY0, PHY1, NVM, Flash)
+ **/
+s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask)
+{
+ u32 swfw_sync;
+ u32 swmask = mask;
+ u32 fwmask = mask << 5;
+ u32 hwmask = 0;
+ u32 timeout = 200;
+ u32 i;
+ s32 ret_val = 0;
+
+ if (swmask == IXGBE_GSSR_EEP_SM)
+ hwmask = IXGBE_GSSR_FLASH_SM;
+
+ /* SW only mask doesn't have FW bit pair */
+ if (swmask == IXGBE_GSSR_SW_MNG_SM)
+ fwmask = 0;
+
+ for (i = 0; i < timeout; i++) {
+ /*
+ * SW NVM semaphore bit is used for access to all
+ * SW_FW_SYNC bits (not just NVM)
+ */
+ if (ixgbe_get_swfw_sync_semaphore(hw)) {
+ ret_val = IXGBE_ERR_SWFW_SYNC;
+ goto out;
+ }
+
+ swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
+ if (!(swfw_sync & (fwmask | swmask | hwmask))) {
+ swfw_sync |= swmask;
+ IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync);
+ ixgbe_release_swfw_sync_semaphore(hw);
+ msleep(5);
+ goto out;
+ } else {
+ /*
+ * Firmware currently using resource (fwmask), hardware
+ * currently using resource (hwmask), or other software
+ * thread currently using resource (swmask)
+ */
+ ixgbe_release_swfw_sync_semaphore(hw);
+ msleep(5);
+ }
+ }
+
+ /* Failed to get SW only semaphore */
+ if (swmask == IXGBE_GSSR_SW_MNG_SM) {
+ ret_val = IXGBE_ERR_SWFW_SYNC;
+ goto out;
+ }
+
+ /* If the resource is not released by the FW/HW the SW can assume that
+ * the FW/HW malfunctions. In that case the SW should sets the SW bit(s)
+ * of the requested resource(s) while ignoring the corresponding FW/HW
+ * bits in the SW_FW_SYNC register.
+ */
+ swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
+ if (swfw_sync & (fwmask | hwmask)) {
+ if (ixgbe_get_swfw_sync_semaphore(hw)) {
+ ret_val = IXGBE_ERR_SWFW_SYNC;
+ goto out;
+ }
+
+ swfw_sync |= swmask;
+ IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync);
+ ixgbe_release_swfw_sync_semaphore(hw);
+ msleep(5);
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_release_swfw_sync_X540 - Release SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to release
+ *
+ * Releases the SWFW semaphore through the SW_FW_SYNC register
+ * for the specified function (CSR, PHY0, PHY1, EVM, Flash)
+ **/
+void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask)
+{
+ u32 swfw_sync;
+ u32 swmask = mask;
+
+ ixgbe_get_swfw_sync_semaphore(hw);
+
+ swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
+ swfw_sync &= ~swmask;
+ IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync);
+
+ ixgbe_release_swfw_sync_semaphore(hw);
+ msleep(5);
+}
+
+/**
+ * ixgbe_get_nvm_semaphore - Get hardware semaphore
+ * @hw: pointer to hardware structure
+ *
+ * Sets the hardware semaphores so SW/FW can gain control of shared resources
+ **/
+static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw)
+{
+ s32 status = IXGBE_ERR_EEPROM;
+ u32 timeout = 2000;
+ u32 i;
+ u32 swsm;
+
+ /* Get SMBI software semaphore between device drivers first */
+ for (i = 0; i < timeout; i++) {
+ /*
+ * If the SMBI bit is 0 when we read it, then the bit will be
+ * set and we have the semaphore
+ */
+ swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+ if (!(swsm & IXGBE_SWSM_SMBI)) {
+ status = 0;
+ break;
+ }
+ udelay(50);
+ }
+
+ /* Now get the semaphore between SW/FW through the REGSMP bit */
+ if (status == 0) {
+ for (i = 0; i < timeout; i++) {
+ swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
+ if (!(swsm & IXGBE_SWFW_REGSMP))
+ break;
+
+ udelay(50);
+ }
+
+ /*
+ * Release semaphores and return error if SW NVM semaphore
+ * was not granted because we don't have access to the EEPROM
+ */
+ if (i >= timeout) {
+ hw_dbg(hw, "REGSMP Software NVM semaphore not "
+ "granted.\n");
+ ixgbe_release_swfw_sync_semaphore(hw);
+ status = IXGBE_ERR_EEPROM;
+ }
+ } else {
+ hw_dbg(hw, "Software semaphore SMBI between device drivers "
+ "not granted.\n");
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_release_nvm_semaphore - Release hardware semaphore
+ * @hw: pointer to hardware structure
+ *
+ * This function clears hardware semaphore bits.
+ **/
+static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw)
+{
+ u32 swsm;
+
+ /* Release both semaphores by writing 0 to the bits REGSMP and SMBI */
+
+ swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+ swsm &= ~IXGBE_SWSM_SMBI;
+ IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm);
+
+ swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
+ swsm &= ~IXGBE_SWFW_REGSMP;
+ IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swsm);
+
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_blink_led_start_X540 - Blink LED based on index.
+ * @hw: pointer to hardware structure
+ * @index: led number to blink
+ *
+ * Devices that implement the version 2 interface:
+ * X540
+ **/
+s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index)
+{
+ u32 macc_reg;
+ u32 ledctl_reg;
+ ixgbe_link_speed speed;
+ bool link_up;
+
+ /*
+ * Link should be up in order for the blink bit in the LED control
+ * register to work. Force link and speed in the MAC if link is down.
+ * This will be reversed when we stop the blinking.
+ */
+ hw->mac.ops.check_link(hw, &speed, &link_up, false);
+ if (link_up == false) {
+ macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+ macc_reg |= IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS;
+ IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg);
+ }
+ /* Set the LED to LINK_UP + BLINK. */
+ ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+ ledctl_reg &= ~IXGBE_LED_MODE_MASK(index);
+ ledctl_reg |= IXGBE_LED_BLINK(index);
+ IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg);
+ IXGBE_WRITE_FLUSH(hw);
+
+ return 0;
+}
+
+/**
+ * ixgbe_blink_led_stop_X540 - Stop blinking LED based on index.
+ * @hw: pointer to hardware structure
+ * @index: led number to stop blinking
+ *
+ * Devices that implement the version 2 interface:
+ * X540
+ **/
+s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index)
+{
+ u32 macc_reg;
+ u32 ledctl_reg;
+
+ /* Restore the LED to its default value. */
+ ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+ ledctl_reg &= ~IXGBE_LED_MODE_MASK(index);
+ ledctl_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index);
+ ledctl_reg &= ~IXGBE_LED_BLINK(index);
+ IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg);
+
+ /* Unforce link and speed in the MAC. */
+ macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+ macc_reg &= ~(IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS);
+ IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg);
+ IXGBE_WRITE_FLUSH(hw);
+
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
new file mode 100644
index 00000000..77e8952d
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
@@ -0,0 +1,58 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_X540_H_
+#define _IXGBE_X540_H_
+
+#include "ixgbe_type.h"
+
+s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed, bool *autoneg);
+enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw);
+s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+ bool autoneg, bool link_up_wait_to_complete);
+s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw);
+u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw);
+
+s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw);
+s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data);
+s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words,
+ u16 *data);
+s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data);
+s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words,
+ u16 *data);
+s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw);
+s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, u16 *checksum_val);
+u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw);
+
+s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask);
+void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask);
+
+s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index);
+#endif /* _IXGBE_X540_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
new file mode 100644
index 00000000..5f2523ed
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
@@ -0,0 +1,1246 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe.h"
+#include "kcompat.h"
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
+/* From lib/vsprintf.c */
+#include <asm/div64.h>
+
+static int skip_atoi(const char **s)
+{
+ int i=0;
+
+ while (isdigit(**s))
+ i = i*10 + *((*s)++) - '0';
+ return i;
+}
+
+#define _kc_ZEROPAD 1 /* pad with zero */
+#define _kc_SIGN 2 /* unsigned/signed long */
+#define _kc_PLUS 4 /* show plus */
+#define _kc_SPACE 8 /* space if plus */
+#define _kc_LEFT 16 /* left justified */
+#define _kc_SPECIAL 32 /* 0x */
+#define _kc_LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type)
+{
+ char c,sign,tmp[66];
+ const char *digits;
+ const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+ const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ int i;
+
+ digits = (type & _kc_LARGE) ? large_digits : small_digits;
+ if (type & _kc_LEFT)
+ type &= ~_kc_ZEROPAD;
+ if (base < 2 || base > 36)
+ return 0;
+ c = (type & _kc_ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & _kc_SIGN) {
+ if (num < 0) {
+ sign = '-';
+ num = -num;
+ size--;
+ } else if (type & _kc_PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & _kc_SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & _kc_SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++]='0';
+ else while (num != 0)
+ tmp[i++] = digits[do_div(num,base)];
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type&(_kc_ZEROPAD+_kc_LEFT))) {
+ while(size-->0) {
+ if (buf <= end)
+ *buf = ' ';
+ ++buf;
+ }
+ }
+ if (sign) {
+ if (buf <= end)
+ *buf = sign;
+ ++buf;
+ }
+ if (type & _kc_SPECIAL) {
+ if (base==8) {
+ if (buf <= end)
+ *buf = '0';
+ ++buf;
+ } else if (base==16) {
+ if (buf <= end)
+ *buf = '0';
+ ++buf;
+ if (buf <= end)
+ *buf = digits[33];
+ ++buf;
+ }
+ }
+ if (!(type & _kc_LEFT)) {
+ while (size-- > 0) {
+ if (buf <= end)
+ *buf = c;
+ ++buf;
+ }
+ }
+ while (i < precision--) {
+ if (buf <= end)
+ *buf = '0';
+ ++buf;
+ }
+ while (i-- > 0) {
+ if (buf <= end)
+ *buf = tmp[i];
+ ++buf;
+ }
+ while (size-- > 0) {
+ if (buf <= end)
+ *buf = ' ';
+ ++buf;
+ }
+ return buf;
+}
+
+int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ int len;
+ unsigned long long num;
+ int i, base;
+ char *str, *end, c;
+ const char *s;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /* min. # of digits for integers; max
+ number of chars for from string */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+ /* 'z' support added 23/7/1999 S.H. */
+ /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+ str = buf;
+ end = buf + size - 1;
+
+ if (end < buf - 1) {
+ end = ((void *) -1);
+ size = end - buf + 1;
+ }
+
+ for (; *fmt ; ++fmt) {
+ if (*fmt != '%') {
+ if (str <= end)
+ *str = *fmt;
+ ++str;
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+ repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-': flags |= _kc_LEFT; goto repeat;
+ case '+': flags |= _kc_PLUS; goto repeat;
+ case ' ': flags |= _kc_SPACE; goto repeat;
+ case '#': flags |= _kc_SPECIAL; goto repeat;
+ case '0': flags |= _kc_ZEROPAD; goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= _kc_LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if (isdigit(*fmt))
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+ qualifier = *fmt;
+ ++fmt;
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & _kc_LEFT)) {
+ while (--field_width > 0) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ }
+ c = (unsigned char) va_arg(args, int);
+ if (str <= end)
+ *str = c;
+ ++str;
+ while (--field_width > 0) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ if (!s)
+ s = "<NULL>";
+
+ len = strnlen(s, precision);
+
+ if (!(flags & _kc_LEFT)) {
+ while (len < field_width--) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ }
+ for (i = 0; i < len; ++i) {
+ if (str <= end)
+ *str = *s;
+ ++str; ++s;
+ }
+ while (len < field_width--) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2*sizeof(void *);
+ flags |= _kc_ZEROPAD;
+ }
+ str = number(str, end,
+ (unsigned long) va_arg(args, void *),
+ 16, field_width, precision, flags);
+ continue;
+
+
+ case 'n':
+ /* FIXME:
+ * What does C99 say about the overflow case here? */
+ if (qualifier == 'l') {
+ long * ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else if (qualifier == 'Z') {
+ size_t * ip = va_arg(args, size_t *);
+ *ip = (str - buf);
+ } else {
+ int * ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ if (str <= end)
+ *str = '%';
+ ++str;
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'X':
+ flags |= _kc_LARGE;
+ case 'x':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= _kc_SIGN;
+ case 'u':
+ break;
+
+ default:
+ if (str <= end)
+ *str = '%';
+ ++str;
+ if (*fmt) {
+ if (str <= end)
+ *str = *fmt;
+ ++str;
+ } else {
+ --fmt;
+ }
+ continue;
+ }
+ if (qualifier == 'L')
+ num = va_arg(args, long long);
+ else if (qualifier == 'l') {
+ num = va_arg(args, unsigned long);
+ if (flags & _kc_SIGN)
+ num = (signed long) num;
+ } else if (qualifier == 'Z') {
+ num = va_arg(args, size_t);
+ } else if (qualifier == 'h') {
+ num = (unsigned short) va_arg(args, int);
+ if (flags & _kc_SIGN)
+ num = (signed short) num;
+ } else {
+ num = va_arg(args, unsigned int);
+ if (flags & _kc_SIGN)
+ num = (signed int) num;
+ }
+ str = number(str, end, num, base,
+ field_width, precision, flags);
+ }
+ if (str <= end)
+ *str = '\0';
+ else if (size > 0)
+ /* don't write out a null byte if the buf size is zero */
+ *end = '\0';
+ /* the trailing null byte doesn't count towards the total
+ * ++str;
+ */
+ return str-buf;
+}
+
+int _kc_snprintf(char * buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = _kc_vsnprintf(buf,size,fmt,args);
+ va_end(args);
+ return i;
+}
+#endif /* < 2.4.8 */
+
+
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#ifdef CONFIG_PCI_IOV
+int __kc_pci_vfs_assigned(struct pci_dev *dev)
+{
+ unsigned int vfs_assigned = 0;
+#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
+ int pos;
+ struct pci_dev *vfdev;
+ unsigned short dev_id;
+
+ /* only search if we are a PF */
+ if (!dev->is_physfn)
+ return 0;
+
+ /* find SR-IOV capability */
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+ if (!pos)
+ return 0;
+
+ /*
+ * * determine the device ID for the VFs, the vendor ID will be the
+ * * same as the PF so there is no need to check for that one
+ * */
+ pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id);
+
+ /* loop through all the VFs to see if we own any that are assigned */
+ vfdev = pci_get_device(dev->vendor, dev_id, NULL);
+ while (vfdev) {
+ /*
+ * * It is considered assigned if it is a virtual function with
+ * * our dev as the physical function and the assigned bit is set
+ * */
+ if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
+ (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED))
+ vfs_assigned++;
+
+ vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
+ }
+
+#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */
+ return vfs_assigned;
+}
+
+#endif /* CONFIG_PCI_IOV */
+#endif /* 3.10.0 */
+
+
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
+
+/**************************************/
+/* PCI DMA MAPPING */
+
+#if defined(CONFIG_HIGHMEM)
+
+#ifndef PCI_DRAM_OFFSET
+#define PCI_DRAM_OFFSET 0
+#endif
+
+u64
+_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
+ size_t size, int direction)
+{
+ return ((u64) (page - mem_map) << PAGE_SHIFT) + offset +
+ PCI_DRAM_OFFSET;
+}
+
+#else /* CONFIG_HIGHMEM */
+
+u64
+_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
+ size_t size, int direction)
+{
+ return pci_map_single(dev, (void *)page_address(page) + offset, size,
+ direction);
+}
+
+#endif /* CONFIG_HIGHMEM */
+
+void
+_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size,
+ int direction)
+{
+ return pci_unmap_single(dev, dma_addr, size, direction);
+}
+
+#endif /* 2.4.13 => 2.4.3 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
+
+/**************************************/
+/* PCI DRIVER API */
+
+int
+_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask)
+{
+ if (!pci_dma_supported(dev, mask))
+ return -EIO;
+ dev->dma_mask = mask;
+ return 0;
+}
+
+int
+_kc_pci_request_regions(struct pci_dev *dev, char *res_name)
+{
+ int i;
+
+ for (i = 0; i < 6; i++) {
+ if (pci_resource_len(dev, i) == 0)
+ continue;
+
+ if (pci_resource_flags(dev, i) & IORESOURCE_IO) {
+ if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
+ pci_release_regions(dev);
+ return -EBUSY;
+ }
+ } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) {
+ if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
+ pci_release_regions(dev);
+ return -EBUSY;
+ }
+ }
+ }
+ return 0;
+}
+
+void
+_kc_pci_release_regions(struct pci_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < 6; i++) {
+ if (pci_resource_len(dev, i) == 0)
+ continue;
+
+ if (pci_resource_flags(dev, i) & IORESOURCE_IO)
+ release_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
+
+ else if (pci_resource_flags(dev, i) & IORESOURCE_MEM)
+ release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
+ }
+}
+
+/**************************************/
+/* NETWORK DRIVER API */
+
+struct net_device *
+_kc_alloc_etherdev(int sizeof_priv)
+{
+ struct net_device *dev;
+ int alloc_size;
+
+ alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31;
+ dev = kzalloc(alloc_size, GFP_KERNEL);
+ if (!dev)
+ return NULL;
+
+ if (sizeof_priv)
+ dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31);
+ dev->name[0] = '\0';
+ ether_setup(dev);
+
+ return dev;
+}
+
+int
+_kc_is_valid_ether_addr(u8 *addr)
+{
+ const char zaddr[6] = { 0, };
+
+ return !(addr[0] & 1) && memcmp(addr, zaddr, 6);
+}
+
+#endif /* 2.4.3 => 2.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
+
+int
+_kc_pci_set_power_state(struct pci_dev *dev, int state)
+{
+ return 0;
+}
+
+int
+_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable)
+{
+ return 0;
+}
+
+#endif /* 2.4.6 => 2.4.3 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page,
+ int off, int size)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ frag->page = page;
+ frag->page_offset = off;
+ frag->size = size;
+ skb_shinfo(skb)->nr_frags = i + 1;
+}
+
+/*
+ * Original Copyright:
+ * find_next_bit.c: fallback find next bit implementation
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + ffs(tmp);
+}
+
+size_t _kc_strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = strlen(src);
+
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ memcpy(dest, src, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
+
+#endif /* 2.6.0 => 2.4.6 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+ return (i >= size) ? (size - 1) : i;
+}
+#endif /* < 2.6.4 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
+DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1};
+#endif /* < 2.6.10 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
+char *_kc_kstrdup(const char *s, unsigned int gfp)
+{
+ size_t len;
+ char *buf;
+
+ if (!s)
+ return NULL;
+
+ len = strlen(s) + 1;
+ buf = kmalloc(len, gfp);
+ if (buf)
+ memcpy(buf, s, len);
+ return buf;
+}
+#endif /* < 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
+void *_kc_kzalloc(size_t size, int flags)
+{
+ void *ret = kmalloc(size, flags);
+ if (ret)
+ memset(ret, 0, size);
+ return ret;
+}
+#endif /* <= 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
+int _kc_skb_pad(struct sk_buff *skb, int pad)
+{
+ int ntail;
+
+ /* If the skbuff is non linear tailroom is always zero.. */
+ if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
+ memset(skb->data+skb->len, 0, pad);
+ return 0;
+ }
+
+ ntail = skb->data_len + pad - (skb->end - skb->tail);
+ if (likely(skb_cloned(skb) || ntail > 0)) {
+ if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC));
+ goto free_skb;
+ }
+
+#ifdef MAX_SKB_FRAGS
+ if (skb_is_nonlinear(skb) &&
+ !__pskb_pull_tail(skb, skb->data_len))
+ goto free_skb;
+
+#endif
+ memset(skb->data + skb->len, 0, pad);
+ return 0;
+
+free_skb:
+ kfree_skb(skb);
+ return -ENOMEM;
+}
+
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
+int _kc_pci_save_state(struct pci_dev *pdev)
+{
+ struct adapter_struct *adapter = pci_get_drvdata(pdev);
+ int size = PCI_CONFIG_SPACE_LEN, i;
+ u16 pcie_cap_offset, pcie_link_status;
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
+ /* no ->dev for 2.4 kernels */
+ WARN_ON(pdev->dev.driver_data == NULL);
+#endif
+ pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+ if (pcie_cap_offset) {
+ if (!pci_read_config_word(pdev,
+ pcie_cap_offset + PCIE_LINK_STATUS,
+ &pcie_link_status))
+ size = PCIE_CONFIG_SPACE_LEN;
+ }
+ pci_config_space_ich8lan();
+#ifdef HAVE_PCI_ERS
+ if (adapter->config_space == NULL)
+#else
+ WARN_ON(adapter->config_space != NULL);
+#endif
+ adapter->config_space = kmalloc(size, GFP_KERNEL);
+ if (!adapter->config_space) {
+ printk(KERN_ERR "Out of memory in pci_save_state\n");
+ return -ENOMEM;
+ }
+ for (i = 0; i < (size / 4); i++)
+ pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]);
+ return 0;
+}
+
+void _kc_pci_restore_state(struct pci_dev *pdev)
+{
+ struct adapter_struct *adapter = pci_get_drvdata(pdev);
+ int size = PCI_CONFIG_SPACE_LEN, i;
+ u16 pcie_cap_offset;
+ u16 pcie_link_status;
+
+ if (adapter->config_space != NULL) {
+ pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+ if (pcie_cap_offset &&
+ !pci_read_config_word(pdev,
+ pcie_cap_offset + PCIE_LINK_STATUS,
+ &pcie_link_status))
+ size = PCIE_CONFIG_SPACE_LEN;
+
+ pci_config_space_ich8lan();
+ for (i = 0; i < (size / 4); i++)
+ pci_write_config_dword(pdev, i * 4, adapter->config_space[i]);
+#ifndef HAVE_PCI_ERS
+ kfree(adapter->config_space);
+ adapter->config_space = NULL;
+#endif
+ }
+}
+#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
+
+#ifdef HAVE_PCI_ERS
+void _kc_free_netdev(struct net_device *netdev)
+{
+ struct adapter_struct *adapter = netdev_priv(netdev);
+
+ if (adapter->config_space != NULL)
+ kfree(adapter->config_space);
+#ifdef CONFIG_SYSFS
+ if (netdev->reg_state == NETREG_UNINITIALIZED) {
+ kfree((char *)netdev - netdev->padded);
+ } else {
+ BUG_ON(netdev->reg_state != NETREG_UNREGISTERED);
+ netdev->reg_state = NETREG_RELEASED;
+ class_device_put(&netdev->class_dev);
+ }
+#else
+ kfree((char *)netdev - netdev->padded);
+#endif
+}
+#endif
+
+void *_kc_kmemdup(const void *src, size_t len, unsigned gfp)
+{
+ void *p;
+
+ p = kzalloc(len, gfp);
+ if (p)
+ memcpy(p, src, len);
+ return p;
+}
+#endif /* <= 2.6.19 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+/* hexdump code taken from lib/hexdump.c */
+static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
+ int groupsize, unsigned char *linebuf,
+ size_t linebuflen, bool ascii)
+{
+ const u8 *ptr = buf;
+ u8 ch;
+ int j, lx = 0;
+ int ascii_column;
+
+ if (rowsize != 16 && rowsize != 32)
+ rowsize = 16;
+
+ if (!len)
+ goto nil;
+ if (len > rowsize) /* limit to one line at a time */
+ len = rowsize;
+ if ((len % groupsize) != 0) /* no mixed size output */
+ groupsize = 1;
+
+ switch (groupsize) {
+ case 8: {
+ const u64 *ptr8 = buf;
+ int ngroups = len / groupsize;
+
+ for (j = 0; j < ngroups; j++)
+ lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+ "%s%16.16llx", j ? " " : "",
+ (unsigned long long)*(ptr8 + j));
+ ascii_column = 17 * ngroups + 2;
+ break;
+ }
+
+ case 4: {
+ const u32 *ptr4 = buf;
+ int ngroups = len / groupsize;
+
+ for (j = 0; j < ngroups; j++)
+ lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+ "%s%8.8x", j ? " " : "", *(ptr4 + j));
+ ascii_column = 9 * ngroups + 2;
+ break;
+ }
+
+ case 2: {
+ const u16 *ptr2 = buf;
+ int ngroups = len / groupsize;
+
+ for (j = 0; j < ngroups; j++)
+ lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+ "%s%4.4x", j ? " " : "", *(ptr2 + j));
+ ascii_column = 5 * ngroups + 2;
+ break;
+ }
+
+ default:
+ for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) {
+ ch = ptr[j];
+ linebuf[lx++] = hex_asc(ch >> 4);
+ linebuf[lx++] = hex_asc(ch & 0x0f);
+ linebuf[lx++] = ' ';
+ }
+ if (j)
+ lx--;
+
+ ascii_column = 3 * rowsize + 2;
+ break;
+ }
+ if (!ascii)
+ goto nil;
+
+ while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
+ linebuf[lx++] = ' ';
+ for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
+ linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
+ : '.';
+nil:
+ linebuf[lx++] = '\0';
+}
+
+void _kc_print_hex_dump(const char *level,
+ const char *prefix_str, int prefix_type,
+ int rowsize, int groupsize,
+ const void *buf, size_t len, bool ascii)
+{
+ const u8 *ptr = buf;
+ int i, linelen, remaining = len;
+ unsigned char linebuf[200];
+
+ if (rowsize != 16 && rowsize != 32)
+ rowsize = 16;
+
+ for (i = 0; i < len; i += rowsize) {
+ linelen = min(remaining, rowsize);
+ remaining -= rowsize;
+ _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
+ linebuf, sizeof(linebuf), ascii);
+
+ switch (prefix_type) {
+ case DUMP_PREFIX_ADDRESS:
+ printk("%s%s%*p: %s\n", level, prefix_str,
+ (int)(2 * sizeof(void *)), ptr + i, linebuf);
+ break;
+ case DUMP_PREFIX_OFFSET:
+ printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
+ break;
+ default:
+ printk("%s%s%s\n", level, prefix_str, linebuf);
+ break;
+ }
+ }
+}
+#endif /* < 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) )
+int ixgbe_dcb_netlink_register(void)
+{
+ return 0;
+}
+
+int ixgbe_dcb_netlink_unregister(void)
+{
+ return 0;
+}
+
+int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max)
+{
+ return 0;
+}
+#endif /* < 2.6.23 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
+#ifdef NAPI
+struct net_device *napi_to_poll_dev(struct napi_struct *napi)
+{
+ struct adapter_q_vector *q_vector = container_of(napi,
+ struct adapter_q_vector,
+ napi);
+ return &q_vector->poll_dev;
+}
+
+int __kc_adapter_clean(struct net_device *netdev, int *budget)
+{
+ int work_done;
+ int work_to_do = min(*budget, netdev->quota);
+ /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */
+ struct napi_struct *napi = netdev->priv;
+ work_done = napi->poll(napi, work_to_do);
+ *budget -= work_done;
+ netdev->quota -= work_done;
+ return (work_done >= work_to_do) ? 1 : 0;
+}
+#endif /* NAPI */
+#endif /* <= 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
+void _kc_pci_disable_link_state(struct pci_dev *pdev, int state)
+{
+ struct pci_dev *parent = pdev->bus->self;
+ u16 link_state;
+ int pos;
+
+ if (!parent)
+ return;
+
+ pos = pci_find_capability(parent, PCI_CAP_ID_EXP);
+ if (pos) {
+ pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state);
+ link_state &= ~state;
+ pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state);
+ }
+}
+#endif /* < 2.6.26 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
+#ifdef HAVE_TX_MQ
+void _kc_netif_tx_stop_all_queues(struct net_device *netdev)
+{
+ struct adapter_struct *adapter = netdev_priv(netdev);
+ int i;
+
+ netif_stop_queue(netdev);
+ if (netif_is_multiqueue(netdev))
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ netif_stop_subqueue(netdev, i);
+}
+void _kc_netif_tx_wake_all_queues(struct net_device *netdev)
+{
+ struct adapter_struct *adapter = netdev_priv(netdev);
+ int i;
+
+ netif_wake_queue(netdev);
+ if (netif_is_multiqueue(netdev))
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ netif_wake_subqueue(netdev, i);
+}
+void _kc_netif_tx_start_all_queues(struct net_device *netdev)
+{
+ struct adapter_struct *adapter = netdev_priv(netdev);
+ int i;
+
+ netif_start_queue(netdev);
+ if (netif_is_multiqueue(netdev))
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ netif_start_subqueue(netdev, i);
+}
+#endif /* HAVE_TX_MQ */
+
+#ifndef __WARN_printf
+void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...)
+{
+ va_list args;
+
+ printk(KERN_WARNING "------------[ cut here ]------------\n");
+ printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line);
+ va_start(args, fmt);
+ vprintk(fmt, args);
+ va_end(args);
+
+ dump_stack();
+}
+#endif /* __WARN_printf */
+#endif /* < 2.6.27 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
+
+int
+_kc_pci_prepare_to_sleep(struct pci_dev *dev)
+{
+ pci_power_t target_state;
+ int error;
+
+ target_state = pci_choose_state(dev, PMSG_SUSPEND);
+
+ pci_enable_wake(dev, target_state, true);
+
+ error = pci_set_power_state(dev, target_state);
+
+ if (error)
+ pci_enable_wake(dev, target_state, false);
+
+ return error;
+}
+
+int
+_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable)
+{
+ int err;
+
+ err = pci_enable_wake(dev, PCI_D3cold, enable);
+ if (err)
+ goto out;
+
+ err = pci_enable_wake(dev, PCI_D3hot, enable);
+
+out:
+ return err;
+}
+#endif /* < 2.6.28 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
+void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
+ int off, int size)
+{
+ skb_fill_page_desc(skb, i, page, off, size);
+ skb->len += size;
+ skb->data_len += size;
+ skb->truesize += size;
+}
+#endif /* < 3.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) )
+#ifdef HAVE_NETDEV_SELECT_QUEUE
+#include <net/ip.h>
+static u32 _kc_simple_tx_hashrnd;
+static u32 _kc_simple_tx_hashrnd_initialized;
+
+u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb)
+{
+ u32 addr1, addr2, ports;
+ u32 hash, ihl;
+ u8 ip_proto = 0;
+
+ if (unlikely(!_kc_simple_tx_hashrnd_initialized)) {
+ get_random_bytes(&_kc_simple_tx_hashrnd, 4);
+ _kc_simple_tx_hashrnd_initialized = 1;
+ }
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
+ ip_proto = ip_hdr(skb)->protocol;
+ addr1 = ip_hdr(skb)->saddr;
+ addr2 = ip_hdr(skb)->daddr;
+ ihl = ip_hdr(skb)->ihl;
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case htons(ETH_P_IPV6):
+ ip_proto = ipv6_hdr(skb)->nexthdr;
+ addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
+ addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
+ ihl = (40 >> 2);
+ break;
+#endif
+ default:
+ return 0;
+ }
+
+
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_AH:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
+ break;
+
+ default:
+ ports = 0;
+ break;
+ }
+
+ hash = jhash_3words(addr1, addr2, ports, _kc_simple_tx_hashrnd);
+
+ return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+}
+#endif /* HAVE_NETDEV_SELECT_QUEUE */
+#endif /* < 2.6.30 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
+#ifdef HAVE_TX_MQ
+#ifndef CONFIG_NETDEVICES_MULTIQUEUE
+void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+ unsigned int real_num = dev->real_num_tx_queues;
+ struct Qdisc *qdisc;
+ int i;
+
+ if (unlikely(txq > dev->num_tx_queues))
+ ;
+ else if (txq > real_num)
+ dev->real_num_tx_queues = txq;
+ else if ( txq < real_num) {
+ dev->real_num_tx_queues = txq;
+ for (i = txq; i < dev->num_tx_queues; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+ if (qdisc) {
+ spin_lock_bh(qdisc_lock(qdisc));
+ qdisc_reset(qdisc);
+ spin_unlock_bh(qdisc_lock(qdisc));
+ }
+ }
+ }
+}
+#endif /* CONFIG_NETDEVICES_MULTIQUEUE */
+#endif /* HAVE_TX_MQ */
+#endif /* < 2.6.35 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
+static const u32 _kc_flags_dup_features =
+ (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
+
+u32 _kc_ethtool_op_get_flags(struct net_device *dev)
+{
+ return dev->features & _kc_flags_dup_features;
+}
+
+int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
+{
+ if (data & ~supported)
+ return -EINVAL;
+
+ dev->features = ((dev->features & ~_kc_flags_dup_features) |
+ (data & _kc_flags_dup_features));
+ return 0;
+}
+#endif /* < 2.6.36 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)))
+u8 _kc_netdev_get_num_tc(struct net_device *dev)
+{
+ struct adapter_struct *kc_adapter = netdev_priv(dev);
+ if (kc_adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+ return kc_adapter->tc;
+ else
+ return 0;
+}
+
+u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up)
+{
+ struct adapter_struct *kc_adapter = netdev_priv(dev);
+ int tc;
+ u8 map;
+
+ for (tc = 0; tc < IXGBE_DCB_MAX_TRAFFIC_CLASS; tc++) {
+ map = kc_adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap;
+
+ if (map & (1 << up))
+ return tc;
+ }
+
+ return 0;
+}
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */
+#endif /* < 2.6.39 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
new file mode 100644
index 00000000..bf27579b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
@@ -0,0 +1,3143 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2012 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _KCOMPAT_H_
+#define _KCOMPAT_H_
+
+#ifndef LINUX_VERSION_CODE
+#include <linux/version.h>
+#else
+#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+#endif
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/mii.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+
+/* NAPI enable/disable flags here */
+/* enable NAPI for ixgbe by default */
+#undef CONFIG_IXGBE_NAPI
+#define CONFIG_IXGBE_NAPI
+#define NAPI
+#ifdef CONFIG_IXGBE_NAPI
+#undef NAPI
+#define NAPI
+#endif /* CONFIG_IXGBE_NAPI */
+#ifdef IXGBE_NAPI
+#undef NAPI
+#define NAPI
+#endif /* IXGBE_NAPI */
+#ifdef IXGBE_NO_NAPI
+#undef NAPI
+#endif /* IXGBE_NO_NAPI */
+
+#define adapter_struct ixgbe_adapter
+#define adapter_q_vector ixgbe_q_vector
+
+/* and finally set defines so that the code sees the changes */
+#ifdef NAPI
+#ifndef CONFIG_IXGBE_NAPI
+#define CONFIG_IXGBE_NAPI
+#endif
+#else
+#undef CONFIG_IXGBE_NAPI
+#endif /* NAPI */
+
+/* packet split disable/enable */
+#ifdef DISABLE_PACKET_SPLIT
+#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+#define CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+#endif
+#endif /* DISABLE_PACKET_SPLIT */
+
+/* MSI compatibility code for all kernels and drivers */
+#ifdef DISABLE_PCI_MSI
+#undef CONFIG_PCI_MSI
+#endif
+#ifndef CONFIG_PCI_MSI
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
+struct msix_entry {
+ u16 vector; /* kernel uses to write allocated vector */
+ u16 entry; /* driver uses to specify entry, OS writes */
+};
+#endif
+#undef pci_enable_msi
+#define pci_enable_msi(a) -ENOTSUPP
+#undef pci_disable_msi
+#define pci_disable_msi(a) do {} while (0)
+#undef pci_enable_msix
+#define pci_enable_msix(a, b, c) -ENOTSUPP
+#undef pci_disable_msix
+#define pci_disable_msix(a) do {} while (0)
+#define msi_remove_pci_irq_vectors(a) do {} while (0)
+#endif /* CONFIG_PCI_MSI */
+#ifdef DISABLE_PM
+#undef CONFIG_PM
+#endif
+
+#ifdef DISABLE_NET_POLL_CONTROLLER
+#undef CONFIG_NET_POLL_CONTROLLER
+#endif
+
+#ifndef PMSG_SUSPEND
+#define PMSG_SUSPEND 3
+#endif
+
+/* generic boolean compatibility */
+#undef TRUE
+#undef FALSE
+#define TRUE true
+#define FALSE false
+#ifdef GCC_VERSION
+#if ( GCC_VERSION < 3000 )
+#define _Bool char
+#endif
+#else
+#define _Bool char
+#endif
+
+/* kernels less than 2.4.14 don't have this */
+#ifndef ETH_P_8021Q
+#define ETH_P_8021Q 0x8100
+#endif
+
+#ifndef module_param
+#define module_param(v,t,p) MODULE_PARM(v, "i");
+#endif
+
+#ifndef DMA_64BIT_MASK
+#define DMA_64BIT_MASK 0xffffffffffffffffULL
+#endif
+
+#ifndef DMA_32BIT_MASK
+#define DMA_32BIT_MASK 0x00000000ffffffffULL
+#endif
+
+#ifndef PCI_CAP_ID_EXP
+#define PCI_CAP_ID_EXP 0x10
+#endif
+
+#ifndef PCIE_LINK_STATE_L0S
+#define PCIE_LINK_STATE_L0S 1
+#endif
+#ifndef PCIE_LINK_STATE_L1
+#define PCIE_LINK_STATE_L1 2
+#endif
+
+#ifndef mmiowb
+#ifdef CONFIG_IA64
+#define mmiowb() asm volatile ("mf.a" ::: "memory")
+#else
+#define mmiowb()
+#endif
+#endif
+
+#ifndef SET_NETDEV_DEV
+#define SET_NETDEV_DEV(net, pdev)
+#endif
+
+#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
+#define free_netdev(x) kfree(x)
+#endif
+
+#ifdef HAVE_POLL_CONTROLLER
+#define CONFIG_NET_POLL_CONTROLLER
+#endif
+
+#ifndef SKB_DATAREF_SHIFT
+/* if we do not have the infrastructure to detect if skb_header is cloned
+ just return false in all cases */
+#define skb_header_cloned(x) 0
+#endif
+
+#ifndef NETIF_F_GSO
+#define gso_size tso_size
+#define gso_segs tso_segs
+#endif
+
+#ifndef NETIF_F_GRO
+#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \
+ vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan)
+#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb)
+#endif
+
+#ifndef NETIF_F_SCTP_CSUM
+#define NETIF_F_SCTP_CSUM 0
+#endif
+
+#ifndef NETIF_F_LRO
+#define NETIF_F_LRO (1 << 15)
+#endif
+
+#ifndef NETIF_F_NTUPLE
+#define NETIF_F_NTUPLE (1 << 27)
+#endif
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132
+#endif
+
+#ifndef CHECKSUM_PARTIAL
+#define CHECKSUM_PARTIAL CHECKSUM_HW
+#define CHECKSUM_COMPLETE CHECKSUM_HW
+#endif
+
+#ifndef __read_mostly
+#define __read_mostly
+#endif
+
+#ifndef MII_RESV1
+#define MII_RESV1 0x17 /* Reserved... */
+#endif
+
+#ifndef unlikely
+#define unlikely(_x) _x
+#define likely(_x) _x
+#endif
+
+#ifndef WARN_ON
+#define WARN_ON(x)
+#endif
+
+#ifndef PCI_DEVICE
+#define PCI_DEVICE(vend,dev) \
+ .vendor = (vend), .device = (dev), \
+ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
+#endif
+
+#ifndef node_online
+#define node_online(node) ((node) == 0)
+#endif
+
+#ifndef num_online_cpus
+#define num_online_cpus() smp_num_cpus
+#endif
+
+#ifndef cpu_online
+#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map)
+#endif
+
+#ifndef _LINUX_RANDOM_H
+#include <linux/random.h>
+#endif
+
+#ifndef DECLARE_BITMAP
+#ifndef BITS_TO_LONGS
+#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#endif
+#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)]
+#endif
+
+#ifndef VLAN_HLEN
+#define VLAN_HLEN 4
+#endif
+
+#ifndef VLAN_ETH_HLEN
+#define VLAN_ETH_HLEN 18
+#endif
+
+#ifndef VLAN_ETH_FRAME_LEN
+#define VLAN_ETH_FRAME_LEN 1518
+#endif
+
+#if !defined(IXGBE_DCA) && !defined(IGB_DCA)
+#define dca_get_tag(b) 0
+#define dca_add_requester(a) -1
+#define dca_remove_requester(b) do { } while(0)
+#define DCA_PROVIDER_ADD 0x0001
+#define DCA_PROVIDER_REMOVE 0x0002
+#endif
+
+#ifndef DCA_GET_TAG_TWO_ARGS
+#define dca3_get_tag(a,b) dca_get_tag(b)
+#endif
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#if defined(__i386__) || defined(__x86_64__)
+#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#endif
+#endif
+
+/* taken from 2.6.24 definition in linux/kernel.h */
+#ifndef IS_ALIGNED
+#define IS_ALIGNED(x,a) (((x) % ((typeof(x))(a))) == 0)
+#endif
+
+#ifndef NETIF_F_HW_VLAN_TX
+struct _kc_vlan_ethhdr {
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+#define vlan_ethhdr _kc_vlan_ethhdr
+struct _kc_vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+#define vlan_hdr _kc_vlan_hdr
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#define vlan_tx_tag_present(_skb) 0
+#define vlan_tx_tag_get(_skb) 0
+#endif
+#endif
+
+#ifndef VLAN_PRIO_SHIFT
+#define VLAN_PRIO_SHIFT 13
+#endif
+
+
+#ifndef __GFP_COLD
+#define __GFP_COLD 0
+#endif
+
+/*****************************************************************************/
+/* Installations with ethtool version without eeprom, adapter id, or statistics
+ * support */
+
+#ifndef ETH_GSTRING_LEN
+#define ETH_GSTRING_LEN 32
+#endif
+
+#ifndef ETHTOOL_GSTATS
+#define ETHTOOL_GSTATS 0x1d
+#undef ethtool_drvinfo
+#define ethtool_drvinfo k_ethtool_drvinfo
+struct k_ethtool_drvinfo {
+ u32 cmd;
+ char driver[32];
+ char version[32];
+ char fw_version[32];
+ char bus_info[32];
+ char reserved1[32];
+ char reserved2[16];
+ u32 n_stats;
+ u32 testinfo_len;
+ u32 eedump_len;
+ u32 regdump_len;
+};
+
+struct ethtool_stats {
+ u32 cmd;
+ u32 n_stats;
+ u64 data[0];
+};
+#endif /* ETHTOOL_GSTATS */
+
+#ifndef ETHTOOL_PHYS_ID
+#define ETHTOOL_PHYS_ID 0x1c
+#endif /* ETHTOOL_PHYS_ID */
+
+#ifndef ETHTOOL_GSTRINGS
+#define ETHTOOL_GSTRINGS 0x1b
+enum ethtool_stringset {
+ ETH_SS_TEST = 0,
+ ETH_SS_STATS,
+};
+struct ethtool_gstrings {
+ u32 cmd; /* ETHTOOL_GSTRINGS */
+ u32 string_set; /* string set id e.c. ETH_SS_TEST, etc*/
+ u32 len; /* number of strings in the string set */
+ u8 data[0];
+};
+#endif /* ETHTOOL_GSTRINGS */
+
+#ifndef ETHTOOL_TEST
+#define ETHTOOL_TEST 0x1a
+enum ethtool_test_flags {
+ ETH_TEST_FL_OFFLINE = (1 << 0),
+ ETH_TEST_FL_FAILED = (1 << 1),
+};
+struct ethtool_test {
+ u32 cmd;
+ u32 flags;
+ u32 reserved;
+ u32 len;
+ u64 data[0];
+};
+#endif /* ETHTOOL_TEST */
+
+#ifndef ETHTOOL_GEEPROM
+#define ETHTOOL_GEEPROM 0xb
+#undef ETHTOOL_GREGS
+struct ethtool_eeprom {
+ u32 cmd;
+ u32 magic;
+ u32 offset;
+ u32 len;
+ u8 data[0];
+};
+
+struct ethtool_value {
+ u32 cmd;
+ u32 data;
+};
+#endif /* ETHTOOL_GEEPROM */
+
+#ifndef ETHTOOL_GLINK
+#define ETHTOOL_GLINK 0xa
+#endif /* ETHTOOL_GLINK */
+
+#ifndef ETHTOOL_GWOL
+#define ETHTOOL_GWOL 0x5
+#define ETHTOOL_SWOL 0x6
+#define SOPASS_MAX 6
+struct ethtool_wolinfo {
+ u32 cmd;
+ u32 supported;
+ u32 wolopts;
+ u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */
+};
+#endif /* ETHTOOL_GWOL */
+
+#ifndef ETHTOOL_GREGS
+#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers */
+#define ethtool_regs _kc_ethtool_regs
+/* for passing big chunks of data */
+struct _kc_ethtool_regs {
+ u32 cmd;
+ u32 version; /* driver-specific, indicates different chips/revs */
+ u32 len; /* bytes */
+ u8 data[0];
+};
+#endif /* ETHTOOL_GREGS */
+
+#ifndef ETHTOOL_GMSGLVL
+#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */
+#endif
+#ifndef ETHTOOL_SMSGLVL
+#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */
+#endif
+#ifndef ETHTOOL_NWAY_RST
+#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv */
+#endif
+#ifndef ETHTOOL_GLINK
+#define ETHTOOL_GLINK 0x0000000a /* Get link status */
+#endif
+#ifndef ETHTOOL_GEEPROM
+#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */
+#endif
+#ifndef ETHTOOL_SEEPROM
+#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data */
+#endif
+#ifndef ETHTOOL_GCOALESCE
+#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */
+/* for configuring coalescing parameters of chip */
+#define ethtool_coalesce _kc_ethtool_coalesce
+struct _kc_ethtool_coalesce {
+ u32 cmd; /* ETHTOOL_{G,S}COALESCE */
+
+ /* How many usecs to delay an RX interrupt after
+ * a packet arrives. If 0, only rx_max_coalesced_frames
+ * is used.
+ */
+ u32 rx_coalesce_usecs;
+
+ /* How many packets to delay an RX interrupt after
+ * a packet arrives. If 0, only rx_coalesce_usecs is
+ * used. It is illegal to set both usecs and max frames
+ * to zero as this would cause RX interrupts to never be
+ * generated.
+ */
+ u32 rx_max_coalesced_frames;
+
+ /* Same as above two parameters, except that these values
+ * apply while an IRQ is being serviced by the host. Not
+ * all cards support this feature and the values are ignored
+ * in that case.
+ */
+ u32 rx_coalesce_usecs_irq;
+ u32 rx_max_coalesced_frames_irq;
+
+ /* How many usecs to delay a TX interrupt after
+ * a packet is sent. If 0, only tx_max_coalesced_frames
+ * is used.
+ */
+ u32 tx_coalesce_usecs;
+
+ /* How many packets to delay a TX interrupt after
+ * a packet is sent. If 0, only tx_coalesce_usecs is
+ * used. It is illegal to set both usecs and max frames
+ * to zero as this would cause TX interrupts to never be
+ * generated.
+ */
+ u32 tx_max_coalesced_frames;
+
+ /* Same as above two parameters, except that these values
+ * apply while an IRQ is being serviced by the host. Not
+ * all cards support this feature and the values are ignored
+ * in that case.
+ */
+ u32 tx_coalesce_usecs_irq;
+ u32 tx_max_coalesced_frames_irq;
+
+ /* How many usecs to delay in-memory statistics
+ * block updates. Some drivers do not have an in-memory
+ * statistic block, and in such cases this value is ignored.
+ * This value must not be zero.
+ */
+ u32 stats_block_coalesce_usecs;
+
+ /* Adaptive RX/TX coalescing is an algorithm implemented by
+ * some drivers to improve latency under low packet rates and
+ * improve throughput under high packet rates. Some drivers
+ * only implement one of RX or TX adaptive coalescing. Anything
+ * not implemented by the driver causes these values to be
+ * silently ignored.
+ */
+ u32 use_adaptive_rx_coalesce;
+ u32 use_adaptive_tx_coalesce;
+
+ /* When the packet rate (measured in packets per second)
+ * is below pkt_rate_low, the {rx,tx}_*_low parameters are
+ * used.
+ */
+ u32 pkt_rate_low;
+ u32 rx_coalesce_usecs_low;
+ u32 rx_max_coalesced_frames_low;
+ u32 tx_coalesce_usecs_low;
+ u32 tx_max_coalesced_frames_low;
+
+ /* When the packet rate is below pkt_rate_high but above
+ * pkt_rate_low (both measured in packets per second) the
+ * normal {rx,tx}_* coalescing parameters are used.
+ */
+
+ /* When the packet rate is (measured in packets per second)
+ * is above pkt_rate_high, the {rx,tx}_*_high parameters are
+ * used.
+ */
+ u32 pkt_rate_high;
+ u32 rx_coalesce_usecs_high;
+ u32 rx_max_coalesced_frames_high;
+ u32 tx_coalesce_usecs_high;
+ u32 tx_max_coalesced_frames_high;
+
+ /* How often to do adaptive coalescing packet rate sampling,
+ * measured in seconds. Must not be zero.
+ */
+ u32 rate_sample_interval;
+};
+#endif /* ETHTOOL_GCOALESCE */
+
+#ifndef ETHTOOL_SCOALESCE
+#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */
+#endif
+#ifndef ETHTOOL_GRINGPARAM
+#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */
+/* for configuring RX/TX ring parameters */
+#define ethtool_ringparam _kc_ethtool_ringparam
+struct _kc_ethtool_ringparam {
+ u32 cmd; /* ETHTOOL_{G,S}RINGPARAM */
+
+ /* Read only attributes. These indicate the maximum number
+ * of pending RX/TX ring entries the driver will allow the
+ * user to set.
+ */
+ u32 rx_max_pending;
+ u32 rx_mini_max_pending;
+ u32 rx_jumbo_max_pending;
+ u32 tx_max_pending;
+
+ /* Values changeable by the user. The valid values are
+ * in the range 1 to the "*_max_pending" counterpart above.
+ */
+ u32 rx_pending;
+ u32 rx_mini_pending;
+ u32 rx_jumbo_pending;
+ u32 tx_pending;
+};
+#endif /* ETHTOOL_GRINGPARAM */
+
+#ifndef ETHTOOL_SRINGPARAM
+#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */
+#endif
+#ifndef ETHTOOL_GPAUSEPARAM
+#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */
+/* for configuring link flow control parameters */
+#define ethtool_pauseparam _kc_ethtool_pauseparam
+struct _kc_ethtool_pauseparam {
+ u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */
+
+ /* If the link is being auto-negotiated (via ethtool_cmd.autoneg
+ * being true) the user may set 'autoneg' here non-zero to have the
+ * pause parameters be auto-negotiated too. In such a case, the
+ * {rx,tx}_pause values below determine what capabilities are
+ * advertised.
+ *
+ * If 'autoneg' is zero or the link is not being auto-negotiated,
+ * then {rx,tx}_pause force the driver to use/not-use pause
+ * flow control.
+ */
+ u32 autoneg;
+ u32 rx_pause;
+ u32 tx_pause;
+};
+#endif /* ETHTOOL_GPAUSEPARAM */
+
+#ifndef ETHTOOL_SPAUSEPARAM
+#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */
+#endif
+#ifndef ETHTOOL_GRXCSUM
+#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_SRXCSUM
+#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_GTXCSUM
+#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_STXCSUM
+#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_GSG
+#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable
+ * (ethtool_value) */
+#endif
+#ifndef ETHTOOL_SSG
+#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable
+ * (ethtool_value). */
+#endif
+#ifndef ETHTOOL_TEST
+#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */
+#endif
+#ifndef ETHTOOL_GSTRINGS
+#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */
+#endif
+#ifndef ETHTOOL_PHYS_ID
+#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */
+#endif
+#ifndef ETHTOOL_GSTATS
+#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */
+#endif
+#ifndef ETHTOOL_GTSO
+#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_STSO
+#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */
+#endif
+
+#ifndef ETHTOOL_BUSINFO_LEN
+#define ETHTOOL_BUSINFO_LEN 32
+#endif
+
+#ifndef RHEL_RELEASE_CODE
+/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */
+#define RHEL_RELEASE_CODE 0
+#endif
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b))
+#endif
+#ifndef AX_RELEASE_CODE
+#define AX_RELEASE_CODE 0
+#endif
+#ifndef AX_RELEASE_VERSION
+#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b))
+#endif
+
+/* SuSE version macro is the same as Linux kernel version */
+#ifndef SLE_VERSION
+#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c)
+#endif
+#ifndef SLE_VERSION_CODE
+#ifdef CONFIG_SUSE_KERNEL
+/* SLES11 GA is 2.6.27 based */
+#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) )
+#define SLE_VERSION_CODE SLE_VERSION(11,0,0)
+#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) )
+/* SLES11 SP1 is 2.6.32 based */
+#define SLE_VERSION_CODE SLE_VERSION(11,1,0)
+#else
+#define SLE_VERSION_CODE 0
+#endif
+#else /* CONFIG_SUSE_KERNEL */
+#define SLE_VERSION_CODE 0
+#endif /* CONFIG_SUSE_KERNEL */
+#endif /* SLE_VERSION_CODE */
+
+#ifdef __KLOCWORK__
+#ifdef ARRAY_SIZE
+#undef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+#endif /* __KLOCWORK__ */
+
+/*****************************************************************************/
+/* 2.4.3 => 2.4.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
+
+/**************************************/
+/* PCI DRIVER API */
+
+#ifndef pci_set_dma_mask
+#define pci_set_dma_mask _kc_pci_set_dma_mask
+extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask);
+#endif
+
+#ifndef pci_request_regions
+#define pci_request_regions _kc_pci_request_regions
+extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name);
+#endif
+
+#ifndef pci_release_regions
+#define pci_release_regions _kc_pci_release_regions
+extern void _kc_pci_release_regions(struct pci_dev *pdev);
+#endif
+
+/**************************************/
+/* NETWORK DRIVER API */
+
+#ifndef alloc_etherdev
+#define alloc_etherdev _kc_alloc_etherdev
+extern struct net_device * _kc_alloc_etherdev(int sizeof_priv);
+#endif
+
+#ifndef is_valid_ether_addr
+#define is_valid_ether_addr _kc_is_valid_ether_addr
+extern int _kc_is_valid_ether_addr(u8 *addr);
+#endif
+
+/**************************************/
+/* MISCELLANEOUS */
+
+#ifndef INIT_TQUEUE
+#define INIT_TQUEUE(_tq, _routine, _data) \
+ do { \
+ INIT_LIST_HEAD(&(_tq)->list); \
+ (_tq)->sync = 0; \
+ (_tq)->routine = _routine; \
+ (_tq)->data = _data; \
+ } while (0)
+#endif
+
+#endif /* 2.4.3 => 2.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) )
+/* Generic MII registers. */
+#define MII_BMCR 0x00 /* Basic mode control register */
+#define MII_BMSR 0x01 /* Basic mode status register */
+#define MII_PHYSID1 0x02 /* PHYS ID 1 */
+#define MII_PHYSID2 0x03 /* PHYS ID 2 */
+#define MII_ADVERTISE 0x04 /* Advertisement control reg */
+#define MII_LPA 0x05 /* Link partner ability reg */
+#define MII_EXPANSION 0x06 /* Expansion register */
+/* Basic mode control register. */
+#define BMCR_FULLDPLX 0x0100 /* Full duplex */
+#define BMCR_ANENABLE 0x1000 /* Enable auto negotiation */
+/* Basic mode status register. */
+#define BMSR_ERCAP 0x0001 /* Ext-reg capability */
+#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */
+#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */
+#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */
+#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */
+#define BMSR_100FULL 0x4000 /* Can do 100mbps, full-duplex */
+/* Advertisement control register. */
+#define ADVERTISE_CSMA 0x0001 /* Only selector supported */
+#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */
+#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */
+#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */
+#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */
+#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \
+ ADVERTISE_100HALF | ADVERTISE_100FULL)
+/* Expansion register for auto-negotiation. */
+#define EXPANSION_ENABLENPAGE 0x0004 /* This enables npage words */
+#endif
+
+/*****************************************************************************/
+/* 2.4.6 => 2.4.3 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
+
+#ifndef pci_set_power_state
+#define pci_set_power_state _kc_pci_set_power_state
+extern int _kc_pci_set_power_state(struct pci_dev *dev, int state);
+#endif
+
+#ifndef pci_enable_wake
+#define pci_enable_wake _kc_pci_enable_wake
+extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable);
+#endif
+
+#ifndef pci_disable_device
+#define pci_disable_device _kc_pci_disable_device
+extern void _kc_pci_disable_device(struct pci_dev *pdev);
+#endif
+
+/* PCI PM entry point syntax changed, so don't support suspend/resume */
+#undef CONFIG_PM
+
+#endif /* 2.4.6 => 2.4.3 */
+
+#ifndef HAVE_PCI_SET_MWI
+#define pci_set_mwi(X) pci_write_config_word(X, \
+ PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \
+ PCI_COMMAND_INVALIDATE);
+#define pci_clear_mwi(X) pci_write_config_word(X, \
+ PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \
+ ~PCI_COMMAND_INVALIDATE);
+#endif
+
+/*****************************************************************************/
+/* 2.4.10 => 2.4.9 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) )
+
+/**************************************/
+/* MODULE API */
+
+#ifndef MODULE_LICENSE
+ #define MODULE_LICENSE(X)
+#endif
+
+/**************************************/
+/* OTHER */
+
+#undef min
+#define min(x,y) ({ \
+ const typeof(x) _x = (x); \
+ const typeof(y) _y = (y); \
+ (void) (&_x == &_y); \
+ _x < _y ? _x : _y; })
+
+#undef max
+#define max(x,y) ({ \
+ const typeof(x) _x = (x); \
+ const typeof(y) _y = (y); \
+ (void) (&_x == &_y); \
+ _x > _y ? _x : _y; })
+
+#define min_t(type,x,y) ({ \
+ type _x = (x); \
+ type _y = (y); \
+ _x < _y ? _x : _y; })
+
+#define max_t(type,x,y) ({ \
+ type _x = (x); \
+ type _y = (y); \
+ _x > _y ? _x : _y; })
+
+#ifndef list_for_each_safe
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+#endif
+
+#ifndef ____cacheline_aligned_in_smp
+#ifdef CONFIG_SMP
+#define ____cacheline_aligned_in_smp ____cacheline_aligned
+#else
+#define ____cacheline_aligned_in_smp
+#endif /* CONFIG_SMP */
+#endif
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
+extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...);
+#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args)
+extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args)
+#else /* 2.4.8 => 2.4.9 */
+extern int snprintf(char * buf, size_t size, const char *fmt, ...);
+extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+#endif
+#endif /* 2.4.10 -> 2.4.6 */
+
+
+/*****************************************************************************/
+/* 2.4.12 => 2.4.10 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) )
+#ifndef HAVE_NETIF_MSG
+#define HAVE_NETIF_MSG 1
+enum {
+ NETIF_MSG_DRV = 0x0001,
+ NETIF_MSG_PROBE = 0x0002,
+ NETIF_MSG_LINK = 0x0004,
+ NETIF_MSG_TIMER = 0x0008,
+ NETIF_MSG_IFDOWN = 0x0010,
+ NETIF_MSG_IFUP = 0x0020,
+ NETIF_MSG_RX_ERR = 0x0040,
+ NETIF_MSG_TX_ERR = 0x0080,
+ NETIF_MSG_TX_QUEUED = 0x0100,
+ NETIF_MSG_INTR = 0x0200,
+ NETIF_MSG_TX_DONE = 0x0400,
+ NETIF_MSG_RX_STATUS = 0x0800,
+ NETIF_MSG_PKTDATA = 0x1000,
+ NETIF_MSG_HW = 0x2000,
+ NETIF_MSG_WOL = 0x4000,
+};
+
+#define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV)
+#define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE)
+#define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK)
+#define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER)
+#define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN)
+#define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP)
+#define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR)
+#define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR)
+#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED)
+#define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR)
+#define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE)
+#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS)
+#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA)
+#endif /* !HAVE_NETIF_MSG */
+#endif /* 2.4.12 => 2.4.10 */
+
+/*****************************************************************************/
+/* 2.4.13 => 2.4.12 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
+
+/**************************************/
+/* PCI DMA MAPPING */
+
+#ifndef virt_to_page
+ #define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT))
+#endif
+
+#ifndef pci_map_page
+#define pci_map_page _kc_pci_map_page
+extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction);
+#endif
+
+#ifndef pci_unmap_page
+#define pci_unmap_page _kc_pci_unmap_page
+extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction);
+#endif
+
+/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */
+
+#undef DMA_32BIT_MASK
+#define DMA_32BIT_MASK 0xffffffff
+#undef DMA_64BIT_MASK
+#define DMA_64BIT_MASK 0xffffffff
+
+/**************************************/
+/* OTHER */
+
+#ifndef cpu_relax
+#define cpu_relax() rep_nop()
+#endif
+
+struct vlan_ethhdr {
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ unsigned short h_vlan_proto;
+ unsigned short h_vlan_TCI;
+ unsigned short h_vlan_encapsulated_proto;
+};
+#endif /* 2.4.13 => 2.4.12 */
+
+/*****************************************************************************/
+/* 2.4.17 => 2.4.12 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) )
+
+#ifndef __devexit_p
+ #define __devexit_p(x) &(x)
+#endif
+
+#endif /* 2.4.17 => 2.4.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) )
+#define NETIF_MSG_HW 0x2000
+#define NETIF_MSG_WOL 0x4000
+
+#ifndef netif_msg_hw
+#define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW)
+#endif
+#ifndef netif_msg_wol
+#define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL)
+#endif
+#endif /* 2.4.18 */
+
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* 2.4.20 => 2.4.19 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) )
+
+/* we won't support NAPI on less than 2.4.20 */
+#ifdef NAPI
+#undef NAPI
+#undef CONFIG_IXGBE_NAPI
+#endif
+
+#endif /* 2.4.20 => 2.4.19 */
+
+/*****************************************************************************/
+/* 2.4.22 => 2.4.17 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
+#define pci_name(x) ((x)->slot_name)
+#endif
+
+/*****************************************************************************/
+/* 2.4.22 => 2.4.17 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
+#ifndef IXGBE_NO_LRO
+/* Don't enable LRO for these legacy kernels */
+#define IXGBE_NO_LRO
+#endif
+#endif
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* 2.4.23 => 2.4.22 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) )
+/*****************************************************************************/
+#ifdef NAPI
+#ifndef netif_poll_disable
+#define netif_poll_disable(x) _kc_netif_poll_disable(x)
+static inline void _kc_netif_poll_disable(struct net_device *netdev)
+{
+ while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) {
+ /* No hurry */
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(1);
+ }
+}
+#endif
+#ifndef netif_poll_enable
+#define netif_poll_enable(x) _kc_netif_poll_enable(x)
+static inline void _kc_netif_poll_enable(struct net_device *netdev)
+{
+ clear_bit(__LINK_STATE_RX_SCHED, &netdev->state);
+}
+#endif
+#endif /* NAPI */
+#ifndef netif_tx_disable
+#define netif_tx_disable(x) _kc_netif_tx_disable(x)
+static inline void _kc_netif_tx_disable(struct net_device *dev)
+{
+ spin_lock_bh(&dev->xmit_lock);
+ netif_stop_queue(dev);
+ spin_unlock_bh(&dev->xmit_lock);
+}
+#endif
+#else /* 2.4.23 => 2.4.22 */
+#define HAVE_SCTP
+#endif /* 2.4.23 => 2.4.22 */
+
+/*****************************************************************************/
+/* 2.6.4 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \
+ ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
+ LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) )
+#define ETHTOOL_OPS_COMPAT
+#endif /* 2.6.4 => 2.6.0 */
+
+/*****************************************************************************/
+/* 2.5.71 => 2.4.x */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) )
+#define sk_protocol protocol
+#define pci_get_device pci_find_device
+#endif /* 2.5.70 => 2.4.x */
+
+/*****************************************************************************/
+/* < 2.4.27 or 2.6.0 <= 2.6.5 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \
+ ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
+ LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) )
+
+#ifndef netif_msg_init
+#define netif_msg_init _kc_netif_msg_init
+static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits)
+{
+ /* use default */
+ if (debug_value < 0 || debug_value >= (sizeof(u32) * 8))
+ return default_msg_enable_bits;
+ if (debug_value == 0) /* no output */
+ return 0;
+ /* set low N bits */
+ return (1 << debug_value) -1;
+}
+#endif
+
+#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */
+/*****************************************************************************/
+#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \
+ (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \
+ ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) )))
+#define netdev_priv(x) x->priv
+#endif
+
+/*****************************************************************************/
+/* <= 2.5.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) )
+#include <linux/rtnetlink.h>
+#undef pci_register_driver
+#define pci_register_driver pci_module_init
+
+/*
+ * Most of the dma compat code is copied/modifed from the 2.4.37
+ * /include/linux/libata-compat.h header file
+ */
+/* These definitions mirror those in pci.h, so they can be used
+ * interchangeably with their PCI_ counterparts */
+enum dma_data_direction {
+ DMA_BIDIRECTIONAL = 0,
+ DMA_TO_DEVICE = 1,
+ DMA_FROM_DEVICE = 2,
+ DMA_NONE = 3,
+};
+
+struct device {
+ struct pci_dev pdev;
+};
+
+static inline struct pci_dev *to_pci_dev (struct device *dev)
+{
+ return (struct pci_dev *) dev;
+}
+static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
+{
+ return (struct device *) pdev;
+}
+
+#define pdev_printk(lvl, pdev, fmt, args...) \
+ printk("%s %s: " fmt, lvl, pci_name(pdev), ## args)
+#define dev_err(dev, fmt, args...) \
+ pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args)
+#define dev_info(dev, fmt, args...) \
+ pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args)
+#define dev_warn(dev, fmt, args...) \
+ pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args)
+
+/* NOTE: dangerous! we ignore the 'gfp' argument */
+#define dma_alloc_coherent(dev,sz,dma,gfp) \
+ pci_alloc_consistent(to_pci_dev(dev),(sz),(dma))
+#define dma_free_coherent(dev,sz,addr,dma_addr) \
+ pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr))
+
+#define dma_map_page(dev,a,b,c,d) \
+ pci_map_page(to_pci_dev(dev),(a),(b),(c),(d))
+#define dma_unmap_page(dev,a,b,c) \
+ pci_unmap_page(to_pci_dev(dev),(a),(b),(c))
+
+#define dma_map_single(dev,a,b,c) \
+ pci_map_single(to_pci_dev(dev),(a),(b),(c))
+#define dma_unmap_single(dev,a,b,c) \
+ pci_unmap_single(to_pci_dev(dev),(a),(b),(c))
+
+#define dma_sync_single(dev,a,b,c) \
+ pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c))
+
+/* for range just sync everything, that's all the pci API can do */
+#define dma_sync_single_range(dev,addr,off,sz,dir) \
+ pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir))
+
+#define dma_set_mask(dev,mask) \
+ pci_set_dma_mask(to_pci_dev(dev),(mask))
+
+/* hlist_* code - double linked lists */
+struct hlist_head {
+ struct hlist_node *first;
+};
+
+struct hlist_node {
+ struct hlist_node *next, **pprev;
+};
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+ struct hlist_node *next = n->next;
+ struct hlist_node **pprev = n->pprev;
+ *pprev = next;
+ if (next)
+ next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+ __hlist_del(n);
+ n->next = NULL;
+ n->pprev = NULL;
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+ struct hlist_node *first = h->first;
+ n->next = first;
+ if (first)
+ first->pprev = &n->next;
+ h->first = n;
+ n->pprev = &h->first;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+ return !h->first;
+}
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+ h->next = NULL;
+ h->pprev = NULL;
+}
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each_entry(tpos, pos, head, member) \
+ for (pos = (head)->first; \
+ pos && ({ prefetch(pos->next); 1;}) && \
+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \
+ for (pos = (head)->first; \
+ pos && ({ n = pos->next; 1; }) && \
+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = n)
+
+#ifndef might_sleep
+#define might_sleep()
+#endif
+#else
+static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
+{
+ return &pdev->dev;
+}
+#endif /* <= 2.5.0 */
+
+/*****************************************************************************/
+/* 2.5.28 => 2.4.23 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) )
+
+static inline void _kc_synchronize_irq(void)
+{
+ synchronize_irq();
+}
+#undef synchronize_irq
+#define synchronize_irq(X) _kc_synchronize_irq()
+
+#include <linux/tqueue.h>
+#define work_struct tq_struct
+#undef INIT_WORK
+#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a)
+#undef container_of
+#define container_of list_entry
+#define schedule_work schedule_task
+#define flush_scheduled_work flush_scheduled_tasks
+#define cancel_work_sync(x) flush_scheduled_work()
+
+#endif /* 2.5.28 => 2.4.17 */
+
+/*****************************************************************************/
+/* 2.6.0 => 2.5.28 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+#undef get_cpu
+#define get_cpu() smp_processor_id()
+#undef put_cpu
+#define put_cpu() do { } while(0)
+#define MODULE_INFO(version, _version)
+#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT
+#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1
+#endif
+#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1
+
+#define dma_set_coherent_mask(dev,mask) 1
+
+#undef dev_put
+#define dev_put(dev) __dev_put(dev)
+
+#ifndef skb_fill_page_desc
+#define skb_fill_page_desc _kc_skb_fill_page_desc
+extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size);
+#endif
+
+#undef ALIGN
+#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
+
+#ifndef page_count
+#define page_count(p) atomic_read(&(p)->count)
+#endif
+
+#ifdef MAX_NUMNODES
+#undef MAX_NUMNODES
+#endif
+#define MAX_NUMNODES 1
+
+/* find_first_bit and find_next bit are not defined for most
+ * 2.4 kernels (except for the redhat 2.4.21 kernels
+ */
+#include <linux/bitops.h>
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+#undef find_next_bit
+#define find_next_bit _kc_find_next_bit
+extern unsigned long _kc_find_next_bit(const unsigned long *addr,
+ unsigned long size,
+ unsigned long offset);
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+
+
+#ifndef netdev_name
+static inline const char *_kc_netdev_name(const struct net_device *dev)
+{
+ if (strchr(dev->name, '%'))
+ return "(unregistered net_device)";
+ return dev->name;
+}
+#define netdev_name(netdev) _kc_netdev_name(netdev)
+#endif /* netdev_name */
+
+#ifndef strlcpy
+#define strlcpy _kc_strlcpy
+extern size_t _kc_strlcpy(char *dest, const char *src, size_t size);
+#endif /* strlcpy */
+
+#endif /* 2.6.0 => 2.5.28 */
+
+/*****************************************************************************/
+/* 2.6.4 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
+#endif /* 2.6.4 => 2.6.0 */
+
+/*****************************************************************************/
+/* 2.6.5 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) )
+#define dma_sync_single_for_cpu dma_sync_single
+#define dma_sync_single_for_device dma_sync_single
+#define dma_sync_single_range_for_cpu dma_sync_single_range
+#define dma_sync_single_range_for_device dma_sync_single_range
+#ifndef pci_dma_mapping_error
+#define pci_dma_mapping_error _kc_pci_dma_mapping_error
+static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr)
+{
+ return dma_addr == 0;
+}
+#endif
+#endif /* 2.6.5 => 2.6.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...);
+#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args)
+#endif /* < 2.6.4 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) )
+/* taken from 2.6 include/linux/bitmap.h */
+#undef bitmap_zero
+#define bitmap_zero _kc_bitmap_zero
+static inline void _kc_bitmap_zero(unsigned long *dst, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = 0UL;
+ else {
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ memset(dst, 0, len);
+ }
+}
+#define random_ether_addr _kc_random_ether_addr
+static inline void _kc_random_ether_addr(u8 *addr)
+{
+ get_random_bytes(addr, ETH_ALEN);
+ addr[0] &= 0xfe; /* clear multicast */
+ addr[0] |= 0x02; /* set local assignment */
+}
+#define page_to_nid(x) 0
+
+#endif /* < 2.6.6 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) )
+#undef if_mii
+#define if_mii _kc_if_mii
+static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq)
+{
+ return (struct mii_ioctl_data *) &rq->ifr_ifru;
+}
+
+#ifndef __force
+#define __force
+#endif
+#endif /* < 2.6.7 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
+#ifndef PCI_EXP_DEVCTL
+#define PCI_EXP_DEVCTL 8
+#endif
+#ifndef PCI_EXP_DEVCTL_CERE
+#define PCI_EXP_DEVCTL_CERE 0x0001
+#endif
+#define msleep(x) do { set_current_state(TASK_UNINTERRUPTIBLE); \
+ schedule_timeout((x * HZ)/1000 + 2); \
+ } while (0)
+
+#endif /* < 2.6.8 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9))
+#include <net/dsfield.h>
+#define __iomem
+
+#ifndef kcalloc
+#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags)
+extern void *_kc_kzalloc(size_t size, int flags);
+#endif
+#define MSEC_PER_SEC 1000L
+static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j)
+{
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+ return (MSEC_PER_SEC / HZ) * j;
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+ return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
+#else
+ return (j * MSEC_PER_SEC) / HZ;
+#endif
+}
+static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m)
+{
+ if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET))
+ return MAX_JIFFY_OFFSET;
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+ return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+ return m * (HZ / MSEC_PER_SEC);
+#else
+ return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC;
+#endif
+}
+
+#define msleep_interruptible _kc_msleep_interruptible
+static inline unsigned long _kc_msleep_interruptible(unsigned int msecs)
+{
+ unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1;
+
+ while (timeout && !signal_pending(current)) {
+ __set_current_state(TASK_INTERRUPTIBLE);
+ timeout = schedule_timeout(timeout);
+ }
+ return _kc_jiffies_to_msecs(timeout);
+}
+
+/* Basic mode control register. */
+#define BMCR_SPEED1000 0x0040 /* MSB of Speed (1000) */
+
+#ifndef __le16
+#define __le16 u16
+#endif
+#ifndef __le32
+#define __le32 u32
+#endif
+#ifndef __le64
+#define __le64 u64
+#endif
+#ifndef __be16
+#define __be16 u16
+#endif
+#ifndef __be32
+#define __be32 u32
+#endif
+#ifndef __be64
+#define __be64 u64
+#endif
+
+static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
+{
+ return (struct vlan_ethhdr *)skb->mac.raw;
+}
+
+/* Wake-On-Lan options. */
+#define WAKE_PHY (1 << 0)
+#define WAKE_UCAST (1 << 1)
+#define WAKE_MCAST (1 << 2)
+#define WAKE_BCAST (1 << 3)
+#define WAKE_ARP (1 << 4)
+#define WAKE_MAGIC (1 << 5)
+#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */
+
+#define skb_header_pointer _kc_skb_header_pointer
+static inline void *_kc_skb_header_pointer(const struct sk_buff *skb,
+ int offset, int len, void *buffer)
+{
+ int hlen = skb_headlen(skb);
+
+ if (hlen - offset >= len)
+ return skb->data + offset;
+
+#ifdef MAX_SKB_FRAGS
+ if (skb_copy_bits(skb, offset, buffer, len) < 0)
+ return NULL;
+
+ return buffer;
+#else
+ return NULL;
+#endif
+
+#ifndef NETDEV_TX_OK
+#define NETDEV_TX_OK 0
+#endif
+#ifndef NETDEV_TX_BUSY
+#define NETDEV_TX_BUSY 1
+#endif
+#ifndef NETDEV_TX_LOCKED
+#define NETDEV_TX_LOCKED -1
+#endif
+}
+
+#ifndef __bitwise
+#define __bitwise
+#endif
+#endif /* < 2.6.9 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
+#ifdef module_param_array_named
+#undef module_param_array_named
+#define module_param_array_named(name, array, type, nump, perm) \
+ static struct kparam_array __param_arr_##name \
+ = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \
+ sizeof(array[0]), array }; \
+ module_param_call(name, param_array_set, param_array_get, \
+ &__param_arr_##name, perm)
+#endif /* module_param_array_named */
+/*
+ * num_online is broken for all < 2.6.10 kernels. This is needed to support
+ * Node module parameter of ixgbe.
+ */
+#undef num_online_nodes
+#define num_online_nodes(n) 1
+extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES);
+#undef node_online_map
+#define node_online_map _kcompat_node_online_map
+#endif /* < 2.6.10 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) )
+#define PCI_D0 0
+#define PCI_D1 1
+#define PCI_D2 2
+#define PCI_D3hot 3
+#define PCI_D3cold 4
+typedef int pci_power_t;
+#define pci_choose_state(pdev,state) state
+#define PMSG_SUSPEND 3
+#define PCI_EXP_LNKCTL 16
+
+#undef NETIF_F_LLTX
+
+#ifndef ARCH_HAS_PREFETCH
+#define prefetch(X)
+#endif
+
+#ifndef NET_IP_ALIGN
+#define NET_IP_ALIGN 2
+#endif
+
+#define KC_USEC_PER_SEC 1000000L
+#define usecs_to_jiffies _kc_usecs_to_jiffies
+static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j)
+{
+#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
+ return (KC_USEC_PER_SEC / HZ) * j;
+#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
+ return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC);
+#else
+ return (j * KC_USEC_PER_SEC) / HZ;
+#endif
+}
+static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m)
+{
+ if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET))
+ return MAX_JIFFY_OFFSET;
+#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
+ return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ);
+#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
+ return m * (HZ / KC_USEC_PER_SEC);
+#else
+ return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC;
+#endif
+}
+#endif /* < 2.6.11 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) )
+#include <linux/reboot.h>
+#define USE_REBOOT_NOTIFIER
+
+/* Generic MII registers. */
+#define MII_CTRL1000 0x09 /* 1000BASE-T control */
+#define MII_STAT1000 0x0a /* 1000BASE-T status */
+/* Advertisement control register. */
+#define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */
+#define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymmetric pause */
+/* 1000BASE-T Control register */
+#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */
+#ifndef is_zero_ether_addr
+#define is_zero_ether_addr _kc_is_zero_ether_addr
+static inline int _kc_is_zero_ether_addr(const u8 *addr)
+{
+ return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
+}
+#endif /* is_zero_ether_addr */
+#ifndef is_multicast_ether_addr
+#define is_multicast_ether_addr _kc_is_multicast_ether_addr
+static inline int _kc_is_multicast_ether_addr(const u8 *addr)
+{
+ return addr[0] & 0x01;
+}
+#endif /* is_multicast_ether_addr */
+#endif /* < 2.6.12 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
+#ifndef kstrdup
+#define kstrdup _kc_kstrdup
+extern char *_kc_kstrdup(const char *s, unsigned int gfp);
+#endif
+#endif /* < 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
+#define pm_message_t u32
+#ifndef kzalloc
+#define kzalloc _kc_kzalloc
+extern void *_kc_kzalloc(size_t size, int flags);
+#endif
+
+/* Generic MII registers. */
+#define MII_ESTATUS 0x0f /* Extended Status */
+/* Basic mode status register. */
+#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */
+/* Extended status register. */
+#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */
+#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */
+
+#define ADVERTISED_Pause (1 << 13)
+#define ADVERTISED_Asym_Pause (1 << 14)
+
+#if (!(RHEL_RELEASE_CODE && \
+ (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))))
+#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t))
+#define gfp_t unsigned
+#else
+typedef unsigned gfp_t;
+#endif
+#endif /* !RHEL4.3->RHEL5.0 */
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) )
+#ifdef CONFIG_X86_64
+#define dma_sync_single_range_for_cpu(dev, dma_handle, offset, size, dir) \
+ dma_sync_single_for_cpu(dev, dma_handle, size, dir)
+#define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir) \
+ dma_sync_single_for_device(dev, dma_handle, size, dir)
+#endif
+#endif
+#endif /* < 2.6.14 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) )
+#ifndef vmalloc_node
+#define vmalloc_node(a,b) vmalloc(a)
+#endif /* vmalloc_node*/
+
+#define setup_timer(_timer, _function, _data) \
+do { \
+ (_timer)->function = _function; \
+ (_timer)->data = _data; \
+ init_timer(_timer); \
+} while (0)
+#ifndef device_can_wakeup
+#define device_can_wakeup(dev) (1)
+#endif
+#ifndef device_set_wakeup_enable
+#define device_set_wakeup_enable(dev, val) do{}while(0)
+#endif
+#ifndef device_init_wakeup
+#define device_init_wakeup(dev,val) do {} while (0)
+#endif
+static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2)
+{
+ const u16 *a = (const u16 *) addr1;
+ const u16 *b = (const u16 *) addr2;
+
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
+}
+#undef compare_ether_addr
+#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2)
+#endif /* < 2.6.15 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) )
+#undef DEFINE_MUTEX
+#define DEFINE_MUTEX(x) DECLARE_MUTEX(x)
+#define mutex_lock(x) down_interruptible(x)
+#define mutex_unlock(x) up(x)
+
+#ifndef ____cacheline_internodealigned_in_smp
+#ifdef CONFIG_SMP
+#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp
+#else
+#define ____cacheline_internodealigned_in_smp
+#endif /* CONFIG_SMP */
+#endif /* ____cacheline_internodealigned_in_smp */
+#undef HAVE_PCI_ERS
+#else /* 2.6.16 and above */
+#undef HAVE_PCI_ERS
+#define HAVE_PCI_ERS
+#endif /* < 2.6.16 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) )
+#ifndef first_online_node
+#define first_online_node 0
+#endif
+#ifndef NET_SKB_PAD
+#define NET_SKB_PAD 16
+#endif
+#endif /* < 2.6.17 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) )
+
+#ifndef IRQ_HANDLED
+#define irqreturn_t void
+#define IRQ_HANDLED
+#define IRQ_NONE
+#endif
+
+#ifndef IRQF_PROBE_SHARED
+#ifdef SA_PROBEIRQ
+#define IRQF_PROBE_SHARED SA_PROBEIRQ
+#else
+#define IRQF_PROBE_SHARED 0
+#endif
+#endif
+
+#ifndef IRQF_SHARED
+#define IRQF_SHARED SA_SHIRQ
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#ifndef FIELD_SIZEOF
+#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+#endif
+
+#ifndef skb_is_gso
+#ifdef NETIF_F_TSO
+#define skb_is_gso _kc_skb_is_gso
+static inline int _kc_skb_is_gso(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->gso_size;
+}
+#else
+#define skb_is_gso(a) 0
+#endif
+#endif
+
+#ifndef resource_size_t
+#define resource_size_t unsigned long
+#endif
+
+#ifdef skb_pad
+#undef skb_pad
+#endif
+#define skb_pad(x,y) _kc_skb_pad(x, y)
+int _kc_skb_pad(struct sk_buff *skb, int pad);
+#ifdef skb_padto
+#undef skb_padto
+#endif
+#define skb_padto(x,y) _kc_skb_padto(x, y)
+static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len)
+{
+ unsigned int size = skb->len;
+ if(likely(size >= len))
+ return 0;
+ return _kc_skb_pad(skb, len - size);
+}
+
+#ifndef DECLARE_PCI_UNMAP_ADDR
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
+ dma_addr_t ADDR_NAME
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
+ u32 LEN_NAME
+#define pci_unmap_addr(PTR, ADDR_NAME) \
+ ((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
+ (((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME) \
+ ((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
+ (((PTR)->LEN_NAME) = (VAL))
+#endif /* DECLARE_PCI_UNMAP_ADDR */
+#endif /* < 2.6.18 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
+
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#endif
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) )
+#if (!((RHEL_RELEASE_CODE && \
+ ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \
+ RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \
+ (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0)))) || \
+ (AX_RELEASE_CODE && AX_RELEASE_CODE > AX_RELEASE_VERSION(3,0))))
+typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *);
+#endif
+#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
+#undef CONFIG_INET_LRO
+#undef CONFIG_INET_LRO_MODULE
+#undef CONFIG_FCOE
+#undef CONFIG_FCOE_MODULE
+#endif
+typedef irqreturn_t (*new_handler_t)(int, void*);
+static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
+#else /* 2.4.x */
+typedef void (*irq_handler_t)(int, void*, struct pt_regs *);
+typedef void (*new_handler_t)(int, void*);
+static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
+#endif /* >= 2.5.x */
+{
+ irq_handler_t new_handler = (irq_handler_t) handler;
+ return request_irq(irq, new_handler, flags, devname, dev_id);
+}
+
+#undef request_irq
+#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id))
+
+#define irq_handler_t new_handler_t
+/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
+#define PCIE_CONFIG_SPACE_LEN 256
+#define PCI_CONFIG_SPACE_LEN 64
+#define PCIE_LINK_STATUS 0x12
+#define pci_config_space_ich8lan() do {} while(0)
+#undef pci_save_state
+extern int _kc_pci_save_state(struct pci_dev *);
+#define pci_save_state(pdev) _kc_pci_save_state(pdev)
+#undef pci_restore_state
+extern void _kc_pci_restore_state(struct pci_dev *);
+#define pci_restore_state(pdev) _kc_pci_restore_state(pdev)
+#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
+
+#ifdef HAVE_PCI_ERS
+#undef free_netdev
+extern void _kc_free_netdev(struct net_device *);
+#define free_netdev(netdev) _kc_free_netdev(netdev)
+#endif
+static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
+{
+ return 0;
+}
+#define pci_disable_pcie_error_reporting(dev) do {} while (0)
+#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0)
+
+extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp);
+#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp)
+#ifndef bool
+#define bool _Bool
+#define true 1
+#define false 0
+#endif
+#else /* 2.6.19 */
+#include <linux/aer.h>
+#include <linux/string.h>
+#endif /* < 2.6.19 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) )
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) )
+#undef INIT_WORK
+#define INIT_WORK(_work, _func) \
+do { \
+ INIT_LIST_HEAD(&(_work)->entry); \
+ (_work)->pending = 0; \
+ (_work)->func = (void (*)(void *))_func; \
+ (_work)->data = _work; \
+ init_timer(&(_work)->timer); \
+} while (0)
+#endif
+
+#ifndef PCI_VDEVICE
+#define PCI_VDEVICE(ven, dev) \
+ PCI_VENDOR_ID_##ven, (dev), \
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0
+#endif
+
+#ifndef round_jiffies
+#define round_jiffies(x) x
+#endif
+
+#define csum_offset csum
+
+#define HAVE_EARLY_VMALLOC_NODE
+#define dev_to_node(dev) -1
+#undef set_dev_node
+/* remove compiler warning with b=b, for unused variable */
+#define set_dev_node(a, b) do { (b) = (b); } while(0)
+
+#if (!(RHEL_RELEASE_CODE && \
+ (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \
+ !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+#endif
+
+#if (!(RHEL_RELEASE_CODE && \
+ (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \
+ !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
+static inline __wsum csum_unfold(__sum16 n)
+{
+ return (__force __wsum)n;
+}
+#endif
+
+#else /* < 2.6.20 */
+#define HAVE_DEVICE_NUMA_NODE
+#endif /* < 2.6.20 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
+#define to_net_dev(class) container_of(class, struct net_device, class_dev)
+#define NETDEV_CLASS_DEV
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)))
+#define vlan_group_get_device(vg, id) (vg->vlan_devices[id])
+#define vlan_group_set_device(vg, id, dev) \
+ do { \
+ if (vg) vg->vlan_devices[id] = dev; \
+ } while (0)
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */
+#define pci_channel_offline(pdev) (pdev->error_state && \
+ pdev->error_state != pci_channel_io_normal)
+#define pci_request_selected_regions(pdev, bars, name) \
+ pci_request_regions(pdev, name)
+#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev);
+#endif /* < 2.6.21 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+#define tcp_hdr(skb) (skb->h.th)
+#define tcp_hdrlen(skb) (skb->h.th->doff << 2)
+#define skb_transport_offset(skb) (skb->h.raw - skb->data)
+#define skb_transport_header(skb) (skb->h.raw)
+#define ipv6_hdr(skb) (skb->nh.ipv6h)
+#define ip_hdr(skb) (skb->nh.iph)
+#define skb_network_offset(skb) (skb->nh.raw - skb->data)
+#define skb_network_header(skb) (skb->nh.raw)
+#define skb_tail_pointer(skb) skb->tail
+#define skb_reset_tail_pointer(skb) \
+ do { \
+ skb->tail = skb->data; \
+ } while (0)
+#define skb_copy_to_linear_data(skb, from, len) \
+ memcpy(skb->data, from, len)
+#define skb_copy_to_linear_data_offset(skb, offset, from, len) \
+ memcpy(skb->data + offset, from, len)
+#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw)
+#define pci_register_driver pci_module_init
+#define skb_mac_header(skb) skb->mac.raw
+
+#ifdef NETIF_F_MULTI_QUEUE
+#ifndef alloc_etherdev_mq
+#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a)
+#endif
+#endif /* NETIF_F_MULTI_QUEUE */
+
+#ifndef ETH_FCS_LEN
+#define ETH_FCS_LEN 4
+#endif
+#define cancel_work_sync(x) flush_scheduled_work()
+#ifndef udp_hdr
+#define udp_hdr _udp_hdr
+static inline struct udphdr *_udp_hdr(const struct sk_buff *skb)
+{
+ return (struct udphdr *)skb_transport_header(skb);
+}
+#endif
+
+#ifdef cpu_to_be16
+#undef cpu_to_be16
+#endif
+#define cpu_to_be16(x) __constant_htons(x)
+
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)))
+enum {
+ DUMP_PREFIX_NONE,
+ DUMP_PREFIX_ADDRESS,
+ DUMP_PREFIX_OFFSET
+};
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */
+#ifndef hex_asc
+#define hex_asc(x) "0123456789abcdef"[x]
+#endif
+#include <linux/ctype.h>
+extern void _kc_print_hex_dump(const char *level, const char *prefix_str,
+ int prefix_type, int rowsize, int groupsize,
+ const void *buf, size_t len, bool ascii);
+#define print_hex_dump(lvl, s, t, r, g, b, l, a) \
+ _kc_print_hex_dump(lvl, s, t, r, g, b, l, a)
+#else /* 2.6.22 */
+#define ETH_TYPE_TRANS_SETS_DEV
+#define HAVE_NETDEV_STATS_IN_NETDEV
+#endif /* < 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) )
+#endif /* > 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) )
+#define netif_subqueue_stopped(_a, _b) 0
+#ifndef PTR_ALIGN
+#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a)))
+#endif
+
+#ifndef CONFIG_PM_SLEEP
+#define CONFIG_PM_SLEEP CONFIG_PM
+#endif
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) )
+#define HAVE_ETHTOOL_GET_PERM_ADDR
+#endif /* 2.6.14 through 2.6.22 */
+#endif /* < 2.6.23 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
+#ifndef ETH_FLAG_LRO
+#define ETH_FLAG_LRO NETIF_F_LRO
+#endif
+
+/* if GRO is supported then the napi struct must already exist */
+#ifndef NETIF_F_GRO
+/* NAPI API changes in 2.6.24 break everything */
+struct napi_struct {
+ /* used to look up the real NAPI polling routine */
+ int (*poll)(struct napi_struct *, int);
+ struct net_device *dev;
+ int weight;
+};
+#endif
+
+#ifdef NAPI
+extern int __kc_adapter_clean(struct net_device *, int *);
+extern struct net_device *napi_to_poll_dev(struct napi_struct *napi);
+#define netif_napi_add(_netdev, _napi, _poll, _weight) \
+ do { \
+ struct napi_struct *__napi = (_napi); \
+ struct net_device *poll_dev = napi_to_poll_dev(__napi); \
+ poll_dev->poll = &(__kc_adapter_clean); \
+ poll_dev->priv = (_napi); \
+ poll_dev->weight = (_weight); \
+ set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \
+ set_bit(__LINK_STATE_START, &poll_dev->state);\
+ dev_hold(poll_dev); \
+ __napi->poll = &(_poll); \
+ __napi->weight = (_weight); \
+ __napi->dev = (_netdev); \
+ } while (0)
+#define netif_napi_del(_napi) \
+ do { \
+ struct net_device *poll_dev = napi_to_poll_dev(_napi); \
+ WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \
+ dev_put(poll_dev); \
+ memset(poll_dev, 0, sizeof(struct net_device));\
+ } while (0)
+#define napi_schedule_prep(_napi) \
+ (netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi)))
+#define napi_schedule(_napi) \
+ do { \
+ if (napi_schedule_prep(_napi)) \
+ __netif_rx_schedule(napi_to_poll_dev(_napi)); \
+ } while (0)
+#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi))
+#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi))
+#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi))
+#ifndef NETIF_F_GRO
+#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi))
+#else
+#define napi_complete(_napi) \
+ do { \
+ napi_gro_flush(_napi); \
+ netif_rx_complete(napi_to_poll_dev(_napi)); \
+ } while (0)
+#endif /* NETIF_F_GRO */
+#else /* NAPI */
+#define netif_napi_add(_netdev, _napi, _poll, _weight) \
+ do { \
+ struct napi_struct *__napi = _napi; \
+ _netdev->poll = &(_poll); \
+ _netdev->weight = (_weight); \
+ __napi->poll = &(_poll); \
+ __napi->weight = (_weight); \
+ __napi->dev = (_netdev); \
+ } while (0)
+#define netif_napi_del(_a) do {} while (0)
+#endif /* NAPI */
+
+#undef dev_get_by_name
+#define dev_get_by_name(_a, _b) dev_get_by_name(_b)
+#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b)
+#ifndef DMA_BIT_MASK
+#define DMA_BIT_MASK(n) (((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1))
+#endif
+
+#ifdef NETIF_F_TSO6
+#define skb_is_gso_v6 _kc_skb_is_gso_v6
+static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
+}
+#endif /* NETIF_F_TSO6 */
+
+#ifndef KERN_CONT
+#define KERN_CONT ""
+#endif
+#else /* < 2.6.24 */
+#define HAVE_ETHTOOL_GET_SSET_COUNT
+#define HAVE_NETDEV_NAPI_LIST
+#endif /* < 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) )
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
+#include <linux/pm_qos_params.h>
+#else /* >= 3.2.0 */
+#include <linux/pm_qos.h>
+#endif /* else >= 3.2.0 */
+#endif /* > 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) )
+#define PM_QOS_CPU_DMA_LATENCY 1
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) )
+#include <linux/latency.h>
+#define PM_QOS_DEFAULT_VALUE INFINITE_LATENCY
+#define pm_qos_add_requirement(pm_qos_class, name, value) \
+ set_acceptable_latency(name, value)
+#define pm_qos_remove_requirement(pm_qos_class, name) \
+ remove_acceptable_latency(name)
+#define pm_qos_update_requirement(pm_qos_class, name, value) \
+ modify_acceptable_latency(name, value)
+#else
+#define PM_QOS_DEFAULT_VALUE -1
+#define pm_qos_add_requirement(pm_qos_class, name, value)
+#define pm_qos_remove_requirement(pm_qos_class, name)
+#define pm_qos_update_requirement(pm_qos_class, name, value) { \
+ if (value != PM_QOS_DEFAULT_VALUE) { \
+ printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \
+ pci_name(adapter->pdev)); \
+ } \
+}
+
+#endif /* > 2.6.18 */
+
+#define pci_enable_device_mem(pdev) pci_enable_device(pdev)
+
+#ifndef DEFINE_PCI_DEVICE_TABLE
+#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[]
+#endif /* DEFINE_PCI_DEVICE_TABLE */
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
+#ifndef IXGBE_PROCFS
+#define IXGBE_PROCFS
+#endif /* IXGBE_PROCFS */
+#endif /* >= 2.6.0 */
+
+
+#else /* < 2.6.25 */
+
+#ifndef IXGBE_SYSFS
+#define IXGBE_SYSFS
+#endif /* IXGBE_SYSFS */
+
+
+#endif /* < 2.6.25 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
+#ifndef clamp_t
+#define clamp_t(type, val, min, max) ({ \
+ type __val = (val); \
+ type __min = (min); \
+ type __max = (max); \
+ __val = __val < __min ? __min : __val; \
+ __val > __max ? __max : __val; })
+#endif /* clamp_t */
+#ifdef NETIF_F_TSO
+#ifdef NETIF_F_TSO6
+#define netif_set_gso_max_size(_netdev, size) \
+ do { \
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { \
+ _netdev->features &= ~NETIF_F_TSO; \
+ _netdev->features &= ~NETIF_F_TSO6; \
+ } else { \
+ _netdev->features |= NETIF_F_TSO; \
+ _netdev->features |= NETIF_F_TSO6; \
+ } \
+ } while (0)
+#else /* NETIF_F_TSO6 */
+#define netif_set_gso_max_size(_netdev, size) \
+ do { \
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
+ _netdev->features &= ~NETIF_F_TSO; \
+ else \
+ _netdev->features |= NETIF_F_TSO; \
+ } while (0)
+#endif /* NETIF_F_TSO6 */
+#else
+#define netif_set_gso_max_size(_netdev, size) do {} while (0)
+#endif /* NETIF_F_TSO */
+#undef kzalloc_node
+#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags)
+
+extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state);
+#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s)
+#else /* < 2.6.26 */
+#include <linux/pci-aspm.h>
+#define HAVE_NETDEV_VLAN_FEATURES
+#endif /* < 2.6.26 */
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
+static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep,
+ __u32 speed)
+{
+ ep->speed = (__u16)speed;
+ /* ep->speed_hi = (__u16)(speed >> 16); */
+}
+#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set
+
+static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep)
+{
+ /* no speed_hi before 2.6.27, and probably no need for it yet */
+ return (__u32)ep->speed;
+}
+#define ethtool_cmd_speed _kc_ethtool_cmd_speed
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) )
+#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM))
+#define ANCIENT_PM 1
+#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \
+ defined(CONFIG_PM_SLEEP))
+#define NEWER_PM 1
+#endif
+#if defined(ANCIENT_PM) || defined(NEWER_PM)
+#undef device_set_wakeup_enable
+#define device_set_wakeup_enable(dev, val) \
+ do { \
+ u16 pmc = 0; \
+ int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \
+ if (pm) { \
+ pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \
+ &pmc); \
+ } \
+ (dev)->power.can_wakeup = !!(pmc >> 11); \
+ (dev)->power.should_wakeup = (val && (pmc >> 11)); \
+ } while (0)
+#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */
+#endif /* 2.6.15 through 2.6.27 */
+#ifndef netif_napi_del
+#define netif_napi_del(_a) do {} while (0)
+#ifdef NAPI
+#ifdef CONFIG_NETPOLL
+#undef netif_napi_del
+#define netif_napi_del(_a) list_del(&(_a)->dev_list);
+#endif
+#endif
+#endif /* netif_napi_del */
+#ifdef dma_mapping_error
+#undef dma_mapping_error
+#endif
+#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr)
+
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+#define HAVE_TX_MQ
+#endif
+
+#ifdef HAVE_TX_MQ
+extern void _kc_netif_tx_stop_all_queues(struct net_device *);
+extern void _kc_netif_tx_wake_all_queues(struct net_device *);
+extern void _kc_netif_tx_start_all_queues(struct net_device *);
+#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a)
+#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a)
+#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a)
+#undef netif_stop_subqueue
+#define netif_stop_subqueue(_ndev,_qi) do { \
+ if (netif_is_multiqueue((_ndev))) \
+ netif_stop_subqueue((_ndev), (_qi)); \
+ else \
+ netif_stop_queue((_ndev)); \
+ } while (0)
+#undef netif_start_subqueue
+#define netif_start_subqueue(_ndev,_qi) do { \
+ if (netif_is_multiqueue((_ndev))) \
+ netif_start_subqueue((_ndev), (_qi)); \
+ else \
+ netif_start_queue((_ndev)); \
+ } while (0)
+#else /* HAVE_TX_MQ */
+#define netif_tx_stop_all_queues(a) netif_stop_queue(a)
+#define netif_tx_wake_all_queues(a) netif_wake_queue(a)
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) )
+#define netif_tx_start_all_queues(a) netif_start_queue(a)
+#else
+#define netif_tx_start_all_queues(a) do {} while (0)
+#endif
+#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev))
+#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev))
+#endif /* HAVE_TX_MQ */
+#ifndef NETIF_F_MULTI_QUEUE
+#define NETIF_F_MULTI_QUEUE 0
+#define netif_is_multiqueue(a) 0
+#define netif_wake_subqueue(a, b)
+#endif /* NETIF_F_MULTI_QUEUE */
+
+#ifndef __WARN_printf
+extern void __kc_warn_slowpath(const char *file, const int line,
+ const char *fmt, ...) __attribute__((format(printf, 3, 4)));
+#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg)
+#endif /* __WARN_printf */
+
+#ifndef WARN
+#define WARN(condition, format...) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ __WARN_printf(format); \
+ unlikely(__ret_warn_on); \
+})
+#endif /* WARN */
+#else /* < 2.6.27 */
+#define HAVE_TX_MQ
+#define HAVE_NETDEV_SELECT_QUEUE
+#endif /* < 2.6.27 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
+#define pci_ioremap_bar(pdev, bar) ioremap(pci_resource_start(pdev, bar), \
+ pci_resource_len(pdev, bar))
+#define pci_wake_from_d3 _kc_pci_wake_from_d3
+#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep
+extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable);
+extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev);
+#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC)
+#ifndef __skb_queue_head_init
+static inline void __kc_skb_queue_head_init(struct sk_buff_head *list)
+{
+ list->prev = list->next = (struct sk_buff *)list;
+ list->qlen = 0;
+}
+#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q)
+#endif
+#endif /* < 2.6.28 */
+
+#ifndef skb_add_rx_frag
+#define skb_add_rx_frag _kc_skb_add_rx_frag
+extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *, int, int);
+#endif
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
+#ifndef swap
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+#endif
+#define pci_request_selected_regions_exclusive(pdev, bars, name) \
+ pci_request_selected_regions(pdev, bars, name)
+#ifndef CONFIG_NR_CPUS
+#define CONFIG_NR_CPUS 1
+#endif /* CONFIG_NR_CPUS */
+#ifndef pcie_aspm_enabled
+#define pcie_aspm_enabled() (1)
+#endif /* pcie_aspm_enabled */
+#else /* < 2.6.29 */
+#ifndef HAVE_NET_DEVICE_OPS
+#define HAVE_NET_DEVICE_OPS
+#endif
+#ifdef CONFIG_DCB
+#define HAVE_PFC_MODE_ENABLE
+#endif /* CONFIG_DCB */
+#endif /* < 2.6.29 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) )
+#define skb_rx_queue_recorded(a) false
+#define skb_get_rx_queue(a) 0
+#undef CONFIG_FCOE
+#undef CONFIG_FCOE_MODULE
+extern u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb);
+#define skb_tx_hash(n, s) _kc_skb_tx_hash(n, s)
+#define skb_record_rx_queue(a, b) do {} while (0)
+#ifndef CONFIG_PCI_IOV
+#undef pci_enable_sriov
+#define pci_enable_sriov(a, b) -ENOTSUPP
+#undef pci_disable_sriov
+#define pci_disable_sriov(a) do {} while (0)
+#endif /* CONFIG_PCI_IOV */
+#ifndef pr_cont
+#define pr_cont(fmt, ...) \
+ printk(KERN_CONT fmt, ##__VA_ARGS__)
+#endif /* pr_cont */
+#else
+#define HAVE_ASPM_QUIRKS
+#endif /* < 2.6.30 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) )
+#define ETH_P_1588 0x88F7
+#define ETH_P_FIP 0x8914
+#ifndef netdev_uc_count
+#define netdev_uc_count(dev) ((dev)->uc_count)
+#endif
+#ifndef netdev_for_each_uc_addr
+#define netdev_for_each_uc_addr(uclist, dev) \
+ for (uclist = dev->uc_list; uclist; uclist = uclist->next)
+#endif
+#else
+#ifndef HAVE_NETDEV_STORAGE_ADDRESS
+#define HAVE_NETDEV_STORAGE_ADDRESS
+#endif
+#ifndef HAVE_NETDEV_HW_ADDR
+#define HAVE_NETDEV_HW_ADDR
+#endif
+#ifndef HAVE_TRANS_START_IN_QUEUE
+#define HAVE_TRANS_START_IN_QUEUE
+#endif
+#endif /* < 2.6.31 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) )
+#undef netdev_tx_t
+#define netdev_tx_t int
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef NETIF_F_FCOE_MTU
+#define NETIF_F_FCOE_MTU (1 << 26)
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+
+#ifndef pm_runtime_get_sync
+#define pm_runtime_get_sync(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_put
+#define pm_runtime_put(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_put_sync
+#define pm_runtime_put_sync(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_resume
+#define pm_runtime_resume(dev) do {} while (0)
+#endif
+#ifndef pm_schedule_suspend
+#define pm_schedule_suspend(dev, t) do {} while (0)
+#endif
+#ifndef pm_runtime_set_suspended
+#define pm_runtime_set_suspended(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_disable
+#define pm_runtime_disable(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_put_noidle
+#define pm_runtime_put_noidle(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_set_active
+#define pm_runtime_set_active(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_enable
+#define pm_runtime_enable(dev) do {} while (0)
+#endif
+#ifndef pm_runtime_get_noresume
+#define pm_runtime_get_noresume(dev) do {} while (0)
+#endif
+#else /* < 2.6.32 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE
+#define HAVE_NETDEV_OPS_FCOE_ENABLE
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#ifdef CONFIG_DCB
+#ifndef HAVE_DCBNL_OPS_GETAPP
+#define HAVE_DCBNL_OPS_GETAPP
+#endif
+#endif /* CONFIG_DCB */
+#include <linux/pm_runtime.h>
+/* IOV bad DMA target work arounds require at least this kernel rev support */
+#define HAVE_PCIE_TYPE
+#endif /* < 2.6.32 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) )
+#ifndef pci_pcie_cap
+#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP)
+#endif
+#ifndef IPV4_FLOW
+#define IPV4_FLOW 0x10
+#endif /* IPV4_FLOW */
+#ifndef IPV6_FLOW
+#define IPV6_FLOW 0x11
+#endif /* IPV6_FLOW */
+/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */
+#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \
+ (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) )
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
+#define HAVE_NETDEV_OPS_FCOE_GETWWN
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#endif /* RHEL6 or SLES11 SP1 */
+#ifndef __percpu
+#define __percpu
+#endif /* __percpu */
+#else /* < 2.6.33 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
+#define HAVE_NETDEV_OPS_FCOE_GETWWN
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#define HAVE_ETHTOOL_SFP_DISPLAY_PORT
+#endif /* < 2.6.33 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) )
+#ifndef ETH_FLAG_NTUPLE
+#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE
+#endif
+
+#ifndef netdev_mc_count
+#define netdev_mc_count(dev) ((dev)->mc_count)
+#endif
+#ifndef netdev_mc_empty
+#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0)
+#endif
+#ifndef netdev_for_each_mc_addr
+#define netdev_for_each_mc_addr(mclist, dev) \
+ for (mclist = dev->mc_list; mclist; mclist = mclist->next)
+#endif
+#ifndef netdev_uc_count
+#define netdev_uc_count(dev) ((dev)->uc.count)
+#endif
+#ifndef netdev_uc_empty
+#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0)
+#endif
+#ifndef netdev_for_each_uc_addr
+#define netdev_for_each_uc_addr(ha, dev) \
+ list_for_each_entry(ha, &dev->uc.list, list)
+#endif
+#ifndef dma_set_coherent_mask
+#define dma_set_coherent_mask(dev,mask) \
+ pci_set_consistent_dma_mask(to_pci_dev(dev),(mask))
+#endif
+#ifndef pci_dev_run_wake
+#define pci_dev_run_wake(pdev) (0)
+#endif
+
+/* netdev logging taken from include/linux/netdevice.h */
+#ifndef netdev_name
+static inline const char *_kc_netdev_name(const struct net_device *dev)
+{
+ if (dev->reg_state != NETREG_REGISTERED)
+ return "(unregistered net_device)";
+ return dev->name;
+}
+#define netdev_name(netdev) _kc_netdev_name(netdev)
+#endif /* netdev_name */
+
+#undef netdev_printk
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+#define netdev_printk(level, netdev, format, args...) \
+do { \
+ struct adapter_struct *kc_adapter = netdev_priv(netdev);\
+ struct pci_dev *pdev = kc_adapter->pdev; \
+ printk("%s %s: " format, level, pci_name(pdev), \
+ ##args); \
+} while(0)
+#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
+#define netdev_printk(level, netdev, format, args...) \
+do { \
+ struct adapter_struct *kc_adapter = netdev_priv(netdev);\
+ struct pci_dev *pdev = kc_adapter->pdev; \
+ struct device *dev = pci_dev_to_dev(pdev); \
+ dev_printk(level, dev, "%s: " format, \
+ netdev_name(netdev), ##args); \
+} while(0)
+#else /* 2.6.21 => 2.6.34 */
+#define netdev_printk(level, netdev, format, args...) \
+ dev_printk(level, (netdev)->dev.parent, \
+ "%s: " format, \
+ netdev_name(netdev), ##args)
+#endif /* <2.6.0 <2.6.21 <2.6.34 */
+#undef netdev_emerg
+#define netdev_emerg(dev, format, args...) \
+ netdev_printk(KERN_EMERG, dev, format, ##args)
+#undef netdev_alert
+#define netdev_alert(dev, format, args...) \
+ netdev_printk(KERN_ALERT, dev, format, ##args)
+#undef netdev_crit
+#define netdev_crit(dev, format, args...) \
+ netdev_printk(KERN_CRIT, dev, format, ##args)
+#undef netdev_err
+#define netdev_err(dev, format, args...) \
+ netdev_printk(KERN_ERR, dev, format, ##args)
+#undef netdev_warn
+#define netdev_warn(dev, format, args...) \
+ netdev_printk(KERN_WARNING, dev, format, ##args)
+#undef netdev_notice
+#define netdev_notice(dev, format, args...) \
+ netdev_printk(KERN_NOTICE, dev, format, ##args)
+#undef netdev_info
+#define netdev_info(dev, format, args...) \
+ netdev_printk(KERN_INFO, dev, format, ##args)
+#undef netdev_dbg
+#if defined(DEBUG)
+#define netdev_dbg(__dev, format, args...) \
+ netdev_printk(KERN_DEBUG, __dev, format, ##args)
+#elif defined(CONFIG_DYNAMIC_DEBUG)
+#define netdev_dbg(__dev, format, args...) \
+do { \
+ dynamic_dev_dbg((__dev)->dev.parent, "%s: " format, \
+ netdev_name(__dev), ##args); \
+} while (0)
+#else /* DEBUG */
+#define netdev_dbg(__dev, format, args...) \
+({ \
+ if (0) \
+ netdev_printk(KERN_DEBUG, __dev, format, ##args); \
+ 0; \
+})
+#endif /* DEBUG */
+
+#undef netif_printk
+#define netif_printk(priv, type, level, dev, fmt, args...) \
+do { \
+ if (netif_msg_##type(priv)) \
+ netdev_printk(level, (dev), fmt, ##args); \
+} while (0)
+
+#undef netif_emerg
+#define netif_emerg(priv, type, dev, fmt, args...) \
+ netif_level(emerg, priv, type, dev, fmt, ##args)
+#undef netif_alert
+#define netif_alert(priv, type, dev, fmt, args...) \
+ netif_level(alert, priv, type, dev, fmt, ##args)
+#undef netif_crit
+#define netif_crit(priv, type, dev, fmt, args...) \
+ netif_level(crit, priv, type, dev, fmt, ##args)
+#undef netif_err
+#define netif_err(priv, type, dev, fmt, args...) \
+ netif_level(err, priv, type, dev, fmt, ##args)
+#undef netif_warn
+#define netif_warn(priv, type, dev, fmt, args...) \
+ netif_level(warn, priv, type, dev, fmt, ##args)
+#undef netif_notice
+#define netif_notice(priv, type, dev, fmt, args...) \
+ netif_level(notice, priv, type, dev, fmt, ##args)
+#undef netif_info
+#define netif_info(priv, type, dev, fmt, args...) \
+ netif_level(info, priv, type, dev, fmt, ##args)
+
+#ifdef SET_SYSTEM_SLEEP_PM_OPS
+#define HAVE_SYSTEM_SLEEP_PM_OPS
+#endif
+
+#ifndef for_each_set_bit
+#define for_each_set_bit(bit, addr, size) \
+ for ((bit) = find_first_bit((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+#endif /* for_each_set_bit */
+
+#ifndef DEFINE_DMA_UNMAP_ADDR
+#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR
+#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN
+#define dma_unmap_addr pci_unmap_addr
+#define dma_unmap_addr_set pci_unmap_addr_set
+#define dma_unmap_len pci_unmap_len
+#define dma_unmap_len_set pci_unmap_len_set
+#endif /* DEFINE_DMA_UNMAP_ADDR */
+#else /* < 2.6.34 */
+#define HAVE_SYSTEM_SLEEP_PM_OPS
+#ifndef HAVE_SET_RX_MODE
+#define HAVE_SET_RX_MODE
+#endif
+
+#endif /* < 2.6.34 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
+#ifndef numa_node_id
+#define numa_node_id() 0
+#endif
+#ifdef HAVE_TX_MQ
+#include <net/sch_generic.h>
+#ifndef CONFIG_NETDEVICES_MULTIQUEUE
+void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int);
+#define netif_set_real_num_tx_queues _kc_netif_set_real_num_tx_queues
+#else /* CONFIG_NETDEVICES_MULTI_QUEUE */
+#define netif_set_real_num_tx_queues(_netdev, _count) \
+ do { \
+ (_netdev)->egress_subqueue_count = _count; \
+ } while (0)
+#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */
+#else
+#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0)
+#endif /* HAVE_TX_MQ */
+#ifndef ETH_FLAG_RXHASH
+#define ETH_FLAG_RXHASH (1<<28)
+#endif /* ETH_FLAG_RXHASH */
+#else /* < 2.6.35 */
+#define HAVE_PM_QOS_REQUEST_LIST
+#define HAVE_IRQ_AFFINITY_HINT
+#endif /* < 2.6.35 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
+extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32);
+#define ethtool_op_set_flags _kc_ethtool_op_set_flags
+extern u32 _kc_ethtool_op_get_flags(struct net_device *);
+#define ethtool_op_get_flags _kc_ethtool_op_get_flags
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#ifdef NET_IP_ALIGN
+#undef NET_IP_ALIGN
+#endif
+#define NET_IP_ALIGN 0
+#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+
+#ifdef NET_SKB_PAD
+#undef NET_SKB_PAD
+#endif
+
+#if (L1_CACHE_BYTES > 32)
+#define NET_SKB_PAD L1_CACHE_BYTES
+#else
+#define NET_SKB_PAD 32
+#endif
+
+static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev,
+ unsigned int length)
+{
+ struct sk_buff *skb;
+
+ skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC);
+ if (skb) {
+#if (NET_IP_ALIGN + NET_SKB_PAD)
+ skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD);
+#endif
+ skb->dev = dev;
+ }
+ return skb;
+}
+
+#ifdef netdev_alloc_skb_ip_align
+#undef netdev_alloc_skb_ip_align
+#endif
+#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l)
+
+#undef netif_level
+#define netif_level(level, priv, type, dev, fmt, args...) \
+do { \
+ if (netif_msg_##type(priv)) \
+ netdev_##level(dev, fmt, ##args); \
+} while (0)
+
+#undef usleep_range
+#define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000))
+
+#else /* < 2.6.36 */
+#define HAVE_PM_QOS_REQUEST_ACTIVE
+#define HAVE_8021P_SUPPORT
+#define HAVE_NDO_GET_STATS64
+#endif /* < 2.6.36 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) )
+#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR
+#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2)
+#endif
+#ifndef VLAN_N_VID
+#define VLAN_N_VID VLAN_GROUP_ARRAY_LEN
+#endif /* VLAN_N_VID */
+#ifndef ETH_FLAG_TXVLAN
+#define ETH_FLAG_TXVLAN (1 << 7)
+#endif /* ETH_FLAG_TXVLAN */
+#ifndef ETH_FLAG_RXVLAN
+#define ETH_FLAG_RXVLAN (1 << 8)
+#endif /* ETH_FLAG_RXVLAN */
+
+static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb)
+{
+ WARN_ON(skb->ip_summed != CHECKSUM_NONE);
+}
+#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb)
+
+static inline void *_kc_vzalloc_node(unsigned long size, int node)
+{
+ void *addr = vmalloc_node(size, node);
+ if (addr)
+ memset(addr, 0, size);
+ return addr;
+}
+#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node)
+
+static inline void *_kc_vzalloc(unsigned long size)
+{
+ void *addr = vmalloc(size);
+ if (addr)
+ memset(addr, 0, size);
+ return addr;
+}
+#define vzalloc(_size) _kc_vzalloc(_size)
+
+#ifndef vlan_get_protocol
+static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb)
+{
+ if (vlan_tx_tag_present(skb) ||
+ skb->protocol != cpu_to_be16(ETH_P_8021Q))
+ return skb->protocol;
+
+ if (skb_headlen(skb) < sizeof(struct vlan_ethhdr))
+ return 0;
+
+ return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto;
+}
+#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb)
+#endif
+#ifdef HAVE_HW_TIME_STAMP
+#define SKBTX_HW_TSTAMP (1 << 0)
+#define SKBTX_IN_PROGRESS (1 << 2)
+#define SKB_SHARED_TX_IS_UNION
+#endif
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) )
+#ifndef HAVE_VLAN_RX_REGISTER
+#define HAVE_VLAN_RX_REGISTER
+#endif
+#endif /* > 2.4.18 */
+#endif /* < 2.6.37 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) )
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+#define skb_checksum_start_offset(skb) skb_transport_offset(skb)
+#else /* 2.6.22 -> 2.6.37 */
+static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb)
+{
+ return skb->csum_start - skb_headroom(skb);
+}
+#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb)
+#endif /* 2.6.22 -> 2.6.37 */
+#ifdef CONFIG_DCB
+#ifndef IEEE_8021QAZ_MAX_TCS
+#define IEEE_8021QAZ_MAX_TCS 8
+#endif
+#ifndef DCB_CAP_DCBX_HOST
+#define DCB_CAP_DCBX_HOST 0x01
+#endif
+#ifndef DCB_CAP_DCBX_LLD_MANAGED
+#define DCB_CAP_DCBX_LLD_MANAGED 0x02
+#endif
+#ifndef DCB_CAP_DCBX_VER_CEE
+#define DCB_CAP_DCBX_VER_CEE 0x04
+#endif
+#ifndef DCB_CAP_DCBX_VER_IEEE
+#define DCB_CAP_DCBX_VER_IEEE 0x08
+#endif
+#ifndef DCB_CAP_DCBX_STATIC
+#define DCB_CAP_DCBX_STATIC 0x10
+#endif
+#endif /* CONFIG_DCB */
+#else /* < 2.6.38 */
+#endif /* < 2.6.38 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
+#ifndef skb_queue_reverse_walk_safe
+#define skb_queue_reverse_walk_safe(queue, skb, tmp) \
+ for (skb = (queue)->prev, tmp = skb->prev; \
+ skb != (struct sk_buff *)(queue); \
+ skb = tmp, tmp = skb->prev)
+#endif
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)))
+extern u8 _kc_netdev_get_num_tc(struct net_device *dev);
+#define netdev_get_num_tc(dev) _kc_netdev_get_num_tc(dev)
+extern u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up);
+#define netdev_get_prio_tc_map(dev, up) _kc_netdev_get_prio_tc_map(dev, up)
+#define netdev_set_prio_tc_map(dev, up, tc) do {} while (0)
+#else /* RHEL6.1 or greater */
+#ifndef HAVE_MQPRIO
+#define HAVE_MQPRIO
+#endif /* HAVE_MQPRIO */
+#ifdef CONFIG_DCB
+#ifndef HAVE_DCBNL_IEEE
+#define HAVE_DCBNL_IEEE
+#ifndef IEEE_8021QAZ_TSA_STRICT
+#define IEEE_8021QAZ_TSA_STRICT 0
+#endif
+#ifndef IEEE_8021QAZ_TSA_ETS
+#define IEEE_8021QAZ_TSA_ETS 2
+#endif
+#ifndef IEEE_8021QAZ_APP_SEL_ETHERTYPE
+#define IEEE_8021QAZ_APP_SEL_ETHERTYPE 1
+#endif
+#endif
+#endif /* CONFIG_DCB */
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */
+#else /* < 2.6.39 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET
+#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#ifndef HAVE_MQPRIO
+#define HAVE_MQPRIO
+#endif
+#ifndef HAVE_SETUP_TC
+#define HAVE_SETUP_TC
+#endif
+#ifdef CONFIG_DCB
+#ifndef HAVE_DCBNL_IEEE
+#define HAVE_DCBNL_IEEE
+#endif
+#endif /* CONFIG_DCB */
+#ifndef HAVE_NDO_SET_FEATURES
+#define HAVE_NDO_SET_FEATURES
+#endif
+#endif /* < 2.6.39 */
+
+/*****************************************************************************/
+/* use < 2.6.40 because of a Fedora 15 kernel update where they
+ * updated the kernel version to 2.6.40.x and they back-ported 3.0 features
+ * like set_phys_id for ethtool.
+ */
+#undef ETHTOOL_GRXRINGS
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) )
+#ifdef ETHTOOL_GRXRINGS
+#ifndef FLOW_EXT
+#define FLOW_EXT 0x80000000
+union _kc_ethtool_flow_union {
+ struct ethtool_tcpip4_spec tcp_ip4_spec;
+ struct ethtool_usrip4_spec usr_ip4_spec;
+ __u8 hdata[60];
+};
+struct _kc_ethtool_flow_ext {
+ __be16 vlan_etype;
+ __be16 vlan_tci;
+ __be32 data[2];
+};
+struct _kc_ethtool_rx_flow_spec {
+ __u32 flow_type;
+ union _kc_ethtool_flow_union h_u;
+ struct _kc_ethtool_flow_ext h_ext;
+ union _kc_ethtool_flow_union m_u;
+ struct _kc_ethtool_flow_ext m_ext;
+ __u64 ring_cookie;
+ __u32 location;
+};
+#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec
+#endif /* FLOW_EXT */
+#endif
+
+#define pci_disable_link_state_locked pci_disable_link_state
+
+#ifndef PCI_LTR_VALUE_MASK
+#define PCI_LTR_VALUE_MASK 0x000003ff
+#endif
+#ifndef PCI_LTR_SCALE_MASK
+#define PCI_LTR_SCALE_MASK 0x00001c00
+#endif
+#ifndef PCI_LTR_SCALE_SHIFT
+#define PCI_LTR_SCALE_SHIFT 10
+#endif
+
+#else /* < 2.6.40 */
+#define HAVE_ETHTOOL_SET_PHYS_ID
+#endif /* < 2.6.40 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
+#ifndef __netdev_alloc_skb_ip_align
+#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l)
+#endif /* __netdev_alloc_skb_ip_align */
+#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app)
+#define dcb_ieee_delapp(dev, app) 0
+#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority)
+#else /* < 3.1.0 */
+#ifndef HAVE_DCBNL_IEEE_DELAPP
+#define HAVE_DCBNL_IEEE_DELAPP
+#endif
+#endif /* < 3.1.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
+#ifdef ETHTOOL_GRXRINGS
+#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
+#endif /* ETHTOOL_GRXRINGS */
+
+#ifndef skb_frag_size
+#define skb_frag_size(frag) _kc_skb_frag_size(frag)
+static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag)
+{
+ return frag->size;
+}
+#endif /* skb_frag_size */
+
+#ifndef skb_frag_size_sub
+#define skb_frag_size_sub(frag, delta) _kc_skb_frag_size_sub(frag, delta)
+static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta)
+{
+ frag->size -= delta;
+}
+#endif /* skb_frag_size_sub */
+
+#ifndef skb_frag_page
+#define skb_frag_page(frag) _kc_skb_frag_page(frag)
+static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag)
+{
+ return frag->page;
+}
+#endif /* skb_frag_page */
+
+#ifndef skb_frag_address
+#define skb_frag_address(frag) _kc_skb_frag_address(frag)
+static inline void *_kc_skb_frag_address(const skb_frag_t *frag)
+{
+ return page_address(skb_frag_page(frag)) + frag->page_offset;
+}
+#endif /* skb_frag_address */
+
+#ifndef skb_frag_dma_map
+#define skb_frag_dma_map(dev,frag,offset,size,dir) \
+ _kc_skb_frag_dma_map(dev,frag,offset,size,dir)
+static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev,
+ const skb_frag_t *frag,
+ size_t offset, size_t size,
+ enum dma_data_direction dir)
+{
+ return dma_map_page(dev, skb_frag_page(frag),
+ frag->page_offset + offset, size, dir);
+}
+#endif /* skb_frag_dma_map */
+
+#ifndef __skb_frag_unref
+#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag)
+static inline void __kc_skb_frag_unref(skb_frag_t *frag)
+{
+ put_page(skb_frag_page(frag));
+}
+#endif /* __skb_frag_unref */
+#else /* < 3.2.0 */
+#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED
+#define HAVE_PCI_DEV_FLAGS_ASSIGNED
+#define HAVE_VF_SPOOFCHK_CONFIGURE
+#endif
+#endif /* < 3.2.0 */
+
+#if (RHEL_RELEASE_CODE && \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))
+#undef ixgbe_get_netdev_tc_txq
+#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc])
+#endif
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) )
+typedef u32 kni_netdev_features_t;
+#else /* ! < 3.3.0 */
+typedef netdev_features_t kni_netdev_features_t;
+#define HAVE_INT_NDO_VLAN_RX_ADD_VID
+#ifdef ETHTOOL_SRXNTUPLE
+#undef ETHTOOL_SRXNTUPLE
+#endif
+#endif /* < 3.3.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
+#ifndef NETIF_F_RXFCS
+#define NETIF_F_RXFCS 0
+#endif /* NETIF_F_RXFCS */
+#ifndef NETIF_F_RXALL
+#define NETIF_F_RXALL 0
+#endif /* NETIF_F_RXALL */
+
+#define NUMTCS_RETURNS_U8
+
+
+#endif /* < 3.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) )
+static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2)
+{
+ return !compare_ether_addr(addr1, addr2);
+}
+#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2))
+#else
+#define HAVE_FDB_OPS
+#endif /* < 3.5.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0) )
+#define NETIF_F_HW_VLAN_TX NETIF_F_HW_VLAN_CTAG_TX
+#define NETIF_F_HW_VLAN_RX NETIF_F_HW_VLAN_CTAG_RX
+#define NETIF_F_HW_VLAN_FILTER NETIF_F_HW_VLAN_CTAG_FILTER
+#endif /* >= 3.10.0 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#ifdef CONFIG_PCI_IOV
+extern int __kc_pci_vfs_assigned(struct pci_dev *dev);
+#else
+static inline int __kc_pci_vfs_assigned(struct pci_dev *dev)
+{
+ return 0;
+}
+#endif
+#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev)
+
+#endif
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) )
+#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops))
+#endif /* >= 3.16.0 */
+
+#endif /* _KCOMPAT_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h
new file mode 100644
index 00000000..a0e5cb6b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_dev.h
@@ -0,0 +1,149 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ */
+
+#ifndef _KNI_DEV_H_
+#define _KNI_DEV_H_
+
+#include <linux/if.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#ifdef RTE_KNI_VHOST
+#include <net/sock.h>
+#endif
+
+#include <exec-env/rte_kni_common.h>
+#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
+
+/**
+ * A structure describing the private information for a kni device.
+ */
+
+struct kni_dev {
+ /* kni list */
+ struct list_head list;
+
+ struct net_device_stats stats;
+ int status;
+ uint16_t group_id; /* Group ID of a group of KNI devices */
+ unsigned core_id; /* Core ID to bind */
+ char name[RTE_KNI_NAMESIZE]; /* Network device name */
+ struct task_struct *pthread;
+
+ /* wait queue for req/resp */
+ wait_queue_head_t wq;
+ struct mutex sync_lock;
+
+ /* PCI device id */
+ uint16_t device_id;
+
+ /* kni device */
+ struct net_device *net_dev;
+ struct net_device *lad_dev;
+ struct pci_dev *pci_dev;
+
+ /* queue for packets to be sent out */
+ void *tx_q;
+
+ /* queue for the packets received */
+ void *rx_q;
+
+ /* queue for the allocated mbufs those can be used to save sk buffs */
+ void *alloc_q;
+
+ /* free queue for the mbufs to be freed */
+ void *free_q;
+
+ /* request queue */
+ void *req_q;
+
+ /* response queue */
+ void *resp_q;
+
+ void * sync_kva;
+ void *sync_va;
+
+ void *mbuf_kva;
+ void *mbuf_va;
+
+ /* mbuf size */
+ unsigned mbuf_size;
+
+ /* synchro for request processing */
+ unsigned long synchro;
+
+#ifdef RTE_KNI_VHOST
+ struct kni_vhost_queue* vhost_queue;
+ volatile enum {
+ BE_STOP = 0x1,
+ BE_START = 0x2,
+ BE_FINISH = 0x4,
+ }vq_status;
+#endif
+};
+
+#define KNI_ERR(args...) printk(KERN_DEBUG "KNI: Error: " args)
+#define KNI_PRINT(args...) printk(KERN_DEBUG "KNI: " args)
+#ifdef RTE_KNI_KO_DEBUG
+ #define KNI_DBG(args...) printk(KERN_DEBUG "KNI: " args)
+#else
+ #define KNI_DBG(args...)
+#endif
+
+#ifdef RTE_KNI_VHOST
+unsigned int
+kni_poll(struct file *file, struct socket *sock, poll_table * wait);
+int kni_chk_vhost_rx(struct kni_dev *kni);
+int kni_vhost_init(struct kni_dev *kni);
+int kni_vhost_backend_release(struct kni_dev *kni);
+
+struct kni_vhost_queue {
+ struct sock sk;
+ struct socket *sock;
+ int vnet_hdr_sz;
+ struct kni_dev *kni;
+ int sockfd;
+ unsigned int flags;
+ struct sk_buff* cache;
+ struct rte_kni_fifo* fifo;
+};
+
+#endif
+
+#ifdef RTE_KNI_VHOST_DEBUG_RX
+ #define KNI_DBG_RX(args...) printk(KERN_DEBUG "KNI RX: " args)
+#else
+ #define KNI_DBG_RX(args...)
+#endif
+
+#ifdef RTE_KNI_VHOST_DEBUG_TX
+ #define KNI_DBG_TX(args...) printk(KERN_DEBUG "KNI TX: " args)
+#else
+ #define KNI_DBG_TX(args...)
+#endif
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/kni_ethtool.c b/lib/librte_eal/linuxapp/kni/kni_ethtool.c
new file mode 100644
index 00000000..06b6d463
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_ethtool.c
@@ -0,0 +1,217 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ */
+
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include "kni_dev.h"
+
+static int
+kni_check_if_running(struct net_device *dev)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ if (priv->lad_dev)
+ return 0;
+ else
+ return -EOPNOTSUPP;
+}
+
+static void
+kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info);
+}
+
+static int
+kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd);
+}
+
+static int
+kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd);
+}
+
+static void
+kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol);
+}
+
+static int
+kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol);
+}
+
+static int
+kni_nway_reset(struct net_device *dev)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev);
+}
+
+static int
+kni_get_eeprom_len(struct net_device *dev)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev);
+}
+
+static int
+kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
+ u8 *bytes)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom,
+ bytes);
+}
+
+static int
+kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
+ u8 *bytes)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom,
+ bytes);
+}
+
+static void
+kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring);
+}
+
+static int
+kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring);
+}
+
+static void
+kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause);
+}
+
+static int
+kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev,
+ pause);
+}
+
+static u32
+kni_get_msglevel(struct net_device *dev)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev);
+}
+
+static void
+kni_set_msglevel(struct net_device *dev, u32 data)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data);
+}
+
+static int
+kni_get_regs_len(struct net_device *dev)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev);
+}
+
+static void
+kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p);
+}
+
+static void
+kni_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset,
+ data);
+}
+
+static int
+kni_get_sset_count(struct net_device *dev, int sset)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset);
+}
+
+static void
+kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct kni_dev *priv = netdev_priv(dev);
+ priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats,
+ data);
+}
+
+struct ethtool_ops kni_ethtool_ops = {
+ .begin = kni_check_if_running,
+ .get_drvinfo = kni_get_drvinfo,
+ .get_settings = kni_get_settings,
+ .set_settings = kni_set_settings,
+ .get_regs_len = kni_get_regs_len,
+ .get_regs = kni_get_regs,
+ .get_wol = kni_get_wol,
+ .set_wol = kni_set_wol,
+ .nway_reset = kni_nway_reset,
+ .get_link = ethtool_op_get_link,
+ .get_eeprom_len = kni_get_eeprom_len,
+ .get_eeprom = kni_get_eeprom,
+ .set_eeprom = kni_set_eeprom,
+ .get_ringparam = kni_get_ringparam,
+ .set_ringparam = kni_set_ringparam,
+ .get_pauseparam = kni_get_pauseparam,
+ .set_pauseparam = kni_set_pauseparam,
+ .get_msglevel = kni_get_msglevel,
+ .set_msglevel = kni_set_msglevel,
+ .get_strings = kni_get_strings,
+ .get_sset_count = kni_get_sset_count,
+ .get_ethtool_stats = kni_get_ethtool_stats,
+};
+
+void
+kni_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &kni_ethtool_ops;
+}
diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h
new file mode 100644
index 00000000..3ea750e2
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_fifo.h
@@ -0,0 +1,108 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ */
+
+#ifndef _KNI_FIFO_H_
+#define _KNI_FIFO_H_
+
+#include <exec-env/rte_kni_common.h>
+
+/**
+ * Adds num elements into the fifo. Return the number actually written
+ */
+static inline unsigned
+kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
+{
+ unsigned i = 0;
+ unsigned fifo_write = fifo->write;
+ unsigned fifo_read = fifo->read;
+ unsigned new_write = fifo_write;
+
+ for (i = 0; i < num; i++) {
+ new_write = (new_write + 1) & (fifo->len - 1);
+
+ if (new_write == fifo_read)
+ break;
+ fifo->buffer[fifo_write] = data[i];
+ fifo_write = new_write;
+ }
+ fifo->write = fifo_write;
+
+ return i;
+}
+
+/**
+ * Get up to num elements from the fifo. Return the number actully read
+ */
+static inline unsigned
+kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
+{
+ unsigned i = 0;
+ unsigned new_read = fifo->read;
+ unsigned fifo_write = fifo->write;
+
+ for (i = 0; i < num; i++) {
+ if (new_read == fifo_write)
+ break;
+
+ data[i] = fifo->buffer[new_read];
+ new_read = (new_read + 1) & (fifo->len - 1);
+ }
+ fifo->read = new_read;
+
+ return i;
+}
+
+/**
+ * Get the num of elements in the fifo
+ */
+static inline unsigned
+kni_fifo_count(struct rte_kni_fifo *fifo)
+{
+ return (fifo->len + fifo->write - fifo->read) & ( fifo->len - 1);
+}
+
+/**
+ * Get the num of available elements in the fifo
+ */
+static inline unsigned
+kni_fifo_free_count(struct rte_kni_fifo *fifo)
+{
+ return (fifo->read - fifo->write - 1) & (fifo->len - 1);
+}
+
+#ifdef RTE_KNI_VHOST
+/**
+ * Initializes the kni fifo structure
+ */
+static inline void
+kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size)
+{
+ fifo->write = 0;
+ fifo->read = 0;
+ fifo->len = size;
+ fifo->elem_size = sizeof(void *);
+}
+#endif
+
+#endif /* _KNI_FIFO_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c
new file mode 100644
index 00000000..ae8133f3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -0,0 +1,688 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/kthread.h>
+#include <linux/rwsem.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include <exec-env/rte_kni_common.h>
+#include "kni_dev.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Kernel Module for managing kni devices");
+
+#define KNI_RX_LOOP_NUM 1000
+
+#define KNI_MAX_DEVICES 32
+
+extern void kni_net_rx(struct kni_dev *kni);
+extern void kni_net_init(struct net_device *dev);
+extern void kni_net_config_lo_mode(char *lo_str);
+extern void kni_net_poll_resp(struct kni_dev *kni);
+extern void kni_set_ethtool_ops(struct net_device *netdev);
+
+extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
+extern void ixgbe_kni_remove(struct pci_dev *pdev);
+extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
+extern void igb_kni_remove(struct pci_dev *pdev);
+
+static int kni_open(struct inode *inode, struct file *file);
+static int kni_release(struct inode *inode, struct file *file);
+static int kni_ioctl(struct inode *inode, unsigned int ioctl_num,
+ unsigned long ioctl_param);
+static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num,
+ unsigned long ioctl_param);
+static int kni_dev_remove(struct kni_dev *dev);
+
+static int __init kni_parse_kthread_mode(void);
+
+/* KNI processing for single kernel thread mode */
+static int kni_thread_single(void *unused);
+/* KNI processing for multiple kernel thread mode */
+static int kni_thread_multiple(void *param);
+
+static struct file_operations kni_fops = {
+ .owner = THIS_MODULE,
+ .open = kni_open,
+ .release = kni_release,
+ .unlocked_ioctl = (void *)kni_ioctl,
+ .compat_ioctl = (void *)kni_compat_ioctl,
+};
+
+static struct miscdevice kni_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = KNI_DEVICE,
+ .fops = &kni_fops,
+};
+
+/* loopback mode */
+static char *lo_mode = NULL;
+
+/* Kernel thread mode */
+static char *kthread_mode = NULL;
+static unsigned multiple_kthread_on = 0;
+
+#define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
+
+static int kni_net_id;
+
+struct kni_net {
+ unsigned long device_in_use; /* device in use flag */
+ struct task_struct *kni_kthread;
+ struct rw_semaphore kni_list_lock;
+ struct list_head kni_list_head;
+};
+
+static int __net_init kni_init_net(struct net *net)
+{
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+ struct kni_net *knet = net_generic(net, kni_net_id);
+#else
+ struct kni_net *knet;
+ int ret;
+
+ knet = kmalloc(sizeof(struct kni_net), GFP_KERNEL);
+ if (!knet) {
+ ret = -ENOMEM;
+ return ret;
+ }
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+
+ /* Clear the bit of device in use */
+ clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
+
+ init_rwsem(&knet->kni_list_lock);
+ INIT_LIST_HEAD(&knet->kni_list_head);
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+ return 0;
+#else
+ ret = net_assign_generic(net, kni_net_id, knet);
+ if (ret < 0)
+ kfree(knet);
+
+ return ret;
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+}
+
+static void __net_exit kni_exit_net(struct net *net)
+{
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32)
+ struct kni_net *knet = net_generic(net, kni_net_id);
+
+ kfree(knet);
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+}
+
+static struct pernet_operations kni_net_ops = {
+ .init = kni_init_net,
+ .exit = kni_exit_net,
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+ .id = &kni_net_id,
+ .size = sizeof(struct kni_net),
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+};
+
+static int __init
+kni_init(void)
+{
+ int rc;
+
+ KNI_PRINT("######## DPDK kni module loading ########\n");
+
+ if (kni_parse_kthread_mode() < 0) {
+ KNI_ERR("Invalid parameter for kthread_mode\n");
+ return -EINVAL;
+ }
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+ rc = register_pernet_subsys(&kni_net_ops);
+#else
+ rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+ if (rc)
+ return -EPERM;
+
+ rc = misc_register(&kni_misc);
+ if (rc != 0) {
+ KNI_ERR("Misc registration failed\n");
+ goto out;
+ }
+
+ /* Configure the lo mode according to the input parameter */
+ kni_net_config_lo_mode(lo_mode);
+
+ KNI_PRINT("######## DPDK kni module loaded ########\n");
+
+ return 0;
+
+out:
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+ unregister_pernet_subsys(&kni_net_ops);
+#else
+ register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+ return rc;
+}
+
+static void __exit
+kni_exit(void)
+{
+ misc_deregister(&kni_misc);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+ unregister_pernet_subsys(&kni_net_ops);
+#else
+ register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+ KNI_PRINT("####### DPDK kni module unloaded #######\n");
+}
+
+static int __init
+kni_parse_kthread_mode(void)
+{
+ if (!kthread_mode)
+ return 0;
+
+ if (strcmp(kthread_mode, "single") == 0)
+ return 0;
+ else if (strcmp(kthread_mode, "multiple") == 0)
+ multiple_kthread_on = 1;
+ else
+ return -1;
+
+ return 0;
+}
+
+static int
+kni_open(struct inode *inode, struct file *file)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct kni_net *knet = net_generic(net, kni_net_id);
+
+ /* kni device can be opened by one user only per netns */
+ if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
+ return -EBUSY;
+
+ /* Create kernel thread for single mode */
+ if (multiple_kthread_on == 0) {
+ KNI_PRINT("Single kernel thread for all KNI devices\n");
+ /* Create kernel thread for RX */
+ knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
+ "kni_single");
+ if (IS_ERR(knet->kni_kthread)) {
+ KNI_ERR("Unable to create kernel threaed\n");
+ return PTR_ERR(knet->kni_kthread);
+ }
+ } else
+ KNI_PRINT("Multiple kernel thread mode enabled\n");
+
+ file->private_data = get_net(net);
+ KNI_PRINT("/dev/kni opened\n");
+
+ return 0;
+}
+
+static int
+kni_release(struct inode *inode, struct file *file)
+{
+ struct net *net = file->private_data;
+ struct kni_net *knet = net_generic(net, kni_net_id);
+ struct kni_dev *dev, *n;
+
+ /* Stop kernel thread for single mode */
+ if (multiple_kthread_on == 0) {
+ /* Stop kernel thread */
+ kthread_stop(knet->kni_kthread);
+ knet->kni_kthread = NULL;
+ }
+
+ down_write(&knet->kni_list_lock);
+ list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+ /* Stop kernel thread for multiple mode */
+ if (multiple_kthread_on && dev->pthread != NULL) {
+ kthread_stop(dev->pthread);
+ dev->pthread = NULL;
+ }
+
+#ifdef RTE_KNI_VHOST
+ kni_vhost_backend_release(dev);
+#endif
+ kni_dev_remove(dev);
+ list_del(&dev->list);
+ }
+ up_write(&knet->kni_list_lock);
+
+ /* Clear the bit of device in use */
+ clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
+
+ put_net(net);
+ KNI_PRINT("/dev/kni closed\n");
+
+ return 0;
+}
+
+static int
+kni_thread_single(void *data)
+{
+ struct kni_net *knet = data;
+ int j;
+ struct kni_dev *dev;
+
+ while (!kthread_should_stop()) {
+ down_read(&knet->kni_list_lock);
+ for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+ list_for_each_entry(dev, &knet->kni_list_head, list) {
+#ifdef RTE_KNI_VHOST
+ kni_chk_vhost_rx(dev);
+#else
+ kni_net_rx(dev);
+#endif
+ kni_net_poll_resp(dev);
+ }
+ }
+ up_read(&knet->kni_list_lock);
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+ /* reschedule out for a while */
+ schedule_timeout_interruptible(usecs_to_jiffies( \
+ KNI_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+ }
+
+ return 0;
+}
+
+static int
+kni_thread_multiple(void *param)
+{
+ int j;
+ struct kni_dev *dev = (struct kni_dev *)param;
+
+ while (!kthread_should_stop()) {
+ for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+#ifdef RTE_KNI_VHOST
+ kni_chk_vhost_rx(dev);
+#else
+ kni_net_rx(dev);
+#endif
+ kni_net_poll_resp(dev);
+ }
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+ schedule_timeout_interruptible(usecs_to_jiffies( \
+ KNI_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+ }
+
+ return 0;
+}
+
+static int
+kni_dev_remove(struct kni_dev *dev)
+{
+ if (!dev)
+ return -ENODEV;
+
+ switch (dev->device_id) {
+ #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
+ #include <rte_pci_dev_ids.h>
+ igb_kni_remove(dev->pci_dev);
+ break;
+ #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev):
+ #include <rte_pci_dev_ids.h>
+ ixgbe_kni_remove(dev->pci_dev);
+ break;
+ default:
+ break;
+ }
+
+ if (dev->net_dev) {
+ unregister_netdev(dev->net_dev);
+ free_netdev(dev->net_dev);
+ }
+
+ return 0;
+}
+
+static int
+kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
+{
+ if (!kni || !dev)
+ return -1;
+
+ /* Check if network name has been used */
+ if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
+ KNI_ERR("KNI name %s duplicated\n", dev->name);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+kni_ioctl_create(struct net *net,
+ unsigned int ioctl_num, unsigned long ioctl_param)
+{
+ struct kni_net *knet = net_generic(net, kni_net_id);
+ int ret;
+ struct rte_kni_device_info dev_info;
+ struct pci_dev *pci = NULL;
+ struct pci_dev *found_pci = NULL;
+ struct net_device *net_dev = NULL;
+ struct net_device *lad_dev = NULL;
+ struct kni_dev *kni, *dev, *n;
+
+ printk(KERN_INFO "KNI: Creating kni...\n");
+ /* Check the buffer size, to avoid warning */
+ if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
+ return -EINVAL;
+
+ /* Copy kni info from user space */
+ ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
+ if (ret) {
+ KNI_ERR("copy_from_user in kni_ioctl_create");
+ return -EIO;
+ }
+
+ /**
+ * Check if the cpu core id is valid for binding,
+ * for multiple kernel thread mode.
+ */
+ if (multiple_kthread_on && dev_info.force_bind &&
+ !cpu_online(dev_info.core_id)) {
+ KNI_ERR("cpu %u is not online\n", dev_info.core_id);
+ return -EINVAL;
+ }
+
+ /* Check if it has been created */
+ down_read(&knet->kni_list_lock);
+ list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+ if (kni_check_param(dev, &dev_info) < 0) {
+ up_read(&knet->kni_list_lock);
+ return -EINVAL;
+ }
+ }
+ up_read(&knet->kni_list_lock);
+
+ net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
+#ifdef NET_NAME_UNKNOWN
+ NET_NAME_UNKNOWN,
+#endif
+ kni_net_init);
+ if (net_dev == NULL) {
+ KNI_ERR("error allocating device \"%s\"\n", dev_info.name);
+ return -EBUSY;
+ }
+
+ dev_net_set(net_dev, net);
+
+ kni = netdev_priv(net_dev);
+
+ kni->net_dev = net_dev;
+ kni->group_id = dev_info.group_id;
+ kni->core_id = dev_info.core_id;
+ strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
+
+ /* Translate user space info into kernel space info */
+ kni->tx_q = phys_to_virt(dev_info.tx_phys);
+ kni->rx_q = phys_to_virt(dev_info.rx_phys);
+ kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
+ kni->free_q = phys_to_virt(dev_info.free_phys);
+
+ kni->req_q = phys_to_virt(dev_info.req_phys);
+ kni->resp_q = phys_to_virt(dev_info.resp_phys);
+ kni->sync_va = dev_info.sync_va;
+ kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+
+ kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);
+ kni->mbuf_va = dev_info.mbuf_va;
+
+#ifdef RTE_KNI_VHOST
+ kni->vhost_queue = NULL;
+ kni->vq_status = BE_STOP;
+#endif
+ kni->mbuf_size = dev_info.mbuf_size;
+
+ KNI_PRINT("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
+ (unsigned long long) dev_info.tx_phys, kni->tx_q);
+ KNI_PRINT("rx_phys: 0x%016llx, rx_q addr: 0x%p\n",
+ (unsigned long long) dev_info.rx_phys, kni->rx_q);
+ KNI_PRINT("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n",
+ (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
+ KNI_PRINT("free_phys: 0x%016llx, free_q addr: 0x%p\n",
+ (unsigned long long) dev_info.free_phys, kni->free_q);
+ KNI_PRINT("req_phys: 0x%016llx, req_q addr: 0x%p\n",
+ (unsigned long long) dev_info.req_phys, kni->req_q);
+ KNI_PRINT("resp_phys: 0x%016llx, resp_q addr: 0x%p\n",
+ (unsigned long long) dev_info.resp_phys, kni->resp_q);
+ KNI_PRINT("mbuf_phys: 0x%016llx, mbuf_kva: 0x%p\n",
+ (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva);
+ KNI_PRINT("mbuf_va: 0x%p\n", dev_info.mbuf_va);
+ KNI_PRINT("mbuf_size: %u\n", kni->mbuf_size);
+
+ KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n",
+ dev_info.bus,
+ dev_info.devid,
+ dev_info.function,
+ dev_info.vendor_id,
+ dev_info.device_id);
+
+ pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
+
+ /* Support Ethtool */
+ while (pci) {
+ KNI_PRINT("pci_bus: %02x:%02x:%02x \n",
+ pci->bus->number,
+ PCI_SLOT(pci->devfn),
+ PCI_FUNC(pci->devfn));
+
+ if ((pci->bus->number == dev_info.bus) &&
+ (PCI_SLOT(pci->devfn) == dev_info.devid) &&
+ (PCI_FUNC(pci->devfn) == dev_info.function)) {
+ found_pci = pci;
+ switch (dev_info.device_id) {
+ #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
+ #include <rte_pci_dev_ids.h>
+ ret = igb_kni_probe(found_pci, &lad_dev);
+ break;
+ #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \
+ case (dev):
+ #include <rte_pci_dev_ids.h>
+ ret = ixgbe_kni_probe(found_pci, &lad_dev);
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n",
+ pci, lad_dev);
+ if (ret == 0) {
+ kni->lad_dev = lad_dev;
+ kni_set_ethtool_ops(kni->net_dev);
+ } else {
+ KNI_ERR("Device not supported by ethtool");
+ kni->lad_dev = NULL;
+ }
+
+ kni->pci_dev = found_pci;
+ kni->device_id = dev_info.device_id;
+ break;
+ }
+ pci = pci_get_device(dev_info.vendor_id,
+ dev_info.device_id, pci);
+ }
+ if (pci)
+ pci_dev_put(pci);
+
+ ret = register_netdev(net_dev);
+ if (ret) {
+ KNI_ERR("error %i registering device \"%s\"\n",
+ ret, dev_info.name);
+ kni_dev_remove(kni);
+ return -ENODEV;
+ }
+
+#ifdef RTE_KNI_VHOST
+ kni_vhost_init(kni);
+#endif
+
+ /**
+ * Create a new kernel thread for multiple mode, set its core affinity,
+ * and finally wake it up.
+ */
+ if (multiple_kthread_on) {
+ kni->pthread = kthread_create(kni_thread_multiple,
+ (void *)kni,
+ "kni_%s", kni->name);
+ if (IS_ERR(kni->pthread)) {
+ kni_dev_remove(kni);
+ return -ECANCELED;
+ }
+ if (dev_info.force_bind)
+ kthread_bind(kni->pthread, kni->core_id);
+ wake_up_process(kni->pthread);
+ }
+
+ down_write(&knet->kni_list_lock);
+ list_add(&kni->list, &knet->kni_list_head);
+ up_write(&knet->kni_list_lock);
+
+ return 0;
+}
+
+static int
+kni_ioctl_release(struct net *net,
+ unsigned int ioctl_num, unsigned long ioctl_param)
+{
+ struct kni_net *knet = net_generic(net, kni_net_id);
+ int ret = -EINVAL;
+ struct kni_dev *dev, *n;
+ struct rte_kni_device_info dev_info;
+
+ if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
+ return -EINVAL;
+
+ ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
+ if (ret) {
+ KNI_ERR("copy_from_user in kni_ioctl_release");
+ return -EIO;
+ }
+
+ /* Release the network device according to its name */
+ if (strlen(dev_info.name) == 0)
+ return ret;
+
+ down_write(&knet->kni_list_lock);
+ list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+ if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
+ continue;
+
+ if (multiple_kthread_on && dev->pthread != NULL) {
+ kthread_stop(dev->pthread);
+ dev->pthread = NULL;
+ }
+
+#ifdef RTE_KNI_VHOST
+ kni_vhost_backend_release(dev);
+#endif
+ kni_dev_remove(dev);
+ list_del(&dev->list);
+ ret = 0;
+ break;
+ }
+ up_write(&knet->kni_list_lock);
+ printk(KERN_INFO "KNI: %s release kni named %s\n",
+ (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
+
+ return ret;
+}
+
+static int
+kni_ioctl(struct inode *inode,
+ unsigned int ioctl_num,
+ unsigned long ioctl_param)
+{
+ int ret = -EINVAL;
+ struct net *net = current->nsproxy->net_ns;
+
+ KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
+
+ /*
+ * Switch according to the ioctl called
+ */
+ switch (_IOC_NR(ioctl_num)) {
+ case _IOC_NR(RTE_KNI_IOCTL_TEST):
+ /* For test only, not used */
+ break;
+ case _IOC_NR(RTE_KNI_IOCTL_CREATE):
+ ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
+ break;
+ case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
+ ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
+ break;
+ default:
+ KNI_DBG("IOCTL default\n");
+ break;
+ }
+
+ return ret;
+}
+
+static int
+kni_compat_ioctl(struct inode *inode,
+ unsigned int ioctl_num,
+ unsigned long ioctl_param)
+{
+ /* 32 bits app on 64 bits OS to be supported later */
+ KNI_PRINT("Not implemented.\n");
+
+ return -EINVAL;
+}
+
+module_init(kni_init);
+module_exit(kni_exit);
+
+module_param(lo_mode, charp, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(lo_mode,
+"KNI loopback mode (default=lo_mode_none):\n"
+" lo_mode_none Kernel loopback disabled\n"
+" lo_mode_fifo Enable kernel loopback with fifo\n"
+" lo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n"
+"\n"
+);
+
+module_param(kthread_mode, charp, S_IRUGO);
+MODULE_PARM_DESC(kthread_mode,
+"Kernel thread mode (default=single):\n"
+" single Single kernel thread mode enabled.\n"
+" multiple Multiple kernel thread mode enabled.\n"
+"\n"
+);
diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c
new file mode 100644
index 00000000..cfa83398
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_net.c
@@ -0,0 +1,706 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ */
+
+/*
+ * This code is inspired from the book "Linux Device Drivers" by
+ * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h> /* eth_type_trans */
+#include <linux/skbuff.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+
+#include <exec-env/rte_kni_common.h>
+#include <kni_fifo.h>
+#include "kni_dev.h"
+
+#define WD_TIMEOUT 5 /*jiffies */
+
+#define MBUF_BURST_SZ 32
+
+#define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
+
+/* typedef for rx function */
+typedef void (*kni_net_rx_t)(struct kni_dev *kni);
+
+static int kni_net_tx(struct sk_buff *skb, struct net_device *dev);
+static void kni_net_rx_normal(struct kni_dev *kni);
+static void kni_net_rx_lo_fifo(struct kni_dev *kni);
+static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni);
+static int kni_net_process_request(struct kni_dev *kni,
+ struct rte_kni_request *req);
+
+/* kni rx function pointer, with default to normal rx */
+static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
+
+/*
+ * Open and close
+ */
+static int
+kni_net_open(struct net_device *dev)
+{
+ int ret;
+ struct rte_kni_request req;
+ struct kni_dev *kni = netdev_priv(dev);
+
+ if (kni->lad_dev)
+ memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
+ else
+ /*
+ * Generate random mac address. eth_random_addr() is the newer
+ * version of generating mac address in linux kernel.
+ */
+ random_ether_addr(dev->dev_addr);
+
+ netif_start_queue(dev);
+
+ memset(&req, 0, sizeof(req));
+ req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
+
+ /* Setting if_up to non-zero means up */
+ req.if_up = 1;
+ ret = kni_net_process_request(kni, &req);
+
+ return (ret == 0) ? req.result : ret;
+}
+
+static int
+kni_net_release(struct net_device *dev)
+{
+ int ret;
+ struct rte_kni_request req;
+ struct kni_dev *kni = netdev_priv(dev);
+
+ netif_stop_queue(dev); /* can't transmit any more */
+
+ memset(&req, 0, sizeof(req));
+ req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
+
+ /* Setting if_up to 0 means down */
+ req.if_up = 0;
+ ret = kni_net_process_request(kni, &req);
+
+ return (ret == 0) ? req.result : ret;
+}
+
+/*
+ * Configuration changes (passed on by ifconfig)
+ */
+static int
+kni_net_config(struct net_device *dev, struct ifmap *map)
+{
+ if (dev->flags & IFF_UP) /* can't act on a running interface */
+ return -EBUSY;
+
+ /* ignore other fields */
+ return 0;
+}
+
+/*
+ * RX: normal working mode
+ */
+static void
+kni_net_rx_normal(struct kni_dev *kni)
+{
+ unsigned ret;
+ uint32_t len;
+ unsigned i, num_rx, num_fq;
+ struct rte_kni_mbuf *kva;
+ struct rte_kni_mbuf *va[MBUF_BURST_SZ];
+ void * data_kva;
+
+ struct sk_buff *skb;
+ struct net_device *dev = kni->net_dev;
+
+ /* Get the number of free entries in free_q */
+ num_fq = kni_fifo_free_count(kni->free_q);
+ if (num_fq == 0) {
+ /* No room on the free_q, bail out */
+ return;
+ }
+
+ /* Calculate the number of entries to dequeue from rx_q */
+ num_rx = min(num_fq, (unsigned)MBUF_BURST_SZ);
+
+ /* Burst dequeue from rx_q */
+ num_rx = kni_fifo_get(kni->rx_q, (void **)va, num_rx);
+ if (num_rx == 0)
+ return;
+
+ /* Transfer received packets to netif */
+ for (i = 0; i < num_rx; i++) {
+ kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+ len = kva->data_len;
+ data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va
+ + kni->mbuf_kva;
+
+ skb = dev_alloc_skb(len + 2);
+ if (!skb) {
+ KNI_ERR("Out of mem, dropping pkts\n");
+ /* Update statistics */
+ kni->stats.rx_dropped++;
+ }
+ else {
+ /* Align IP on 16B boundary */
+ skb_reserve(skb, 2);
+ memcpy(skb_put(skb, len), data_kva, len);
+ skb->dev = dev;
+ skb->protocol = eth_type_trans(skb, dev);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ /* Call netif interface */
+ netif_rx_ni(skb);
+
+ /* Update statistics */
+ kni->stats.rx_bytes += len;
+ kni->stats.rx_packets++;
+ }
+ }
+
+ /* Burst enqueue mbufs into free_q */
+ ret = kni_fifo_put(kni->free_q, (void **)va, num_rx);
+ if (ret != num_rx)
+ /* Failing should not happen */
+ KNI_ERR("Fail to enqueue entries into free_q\n");
+}
+
+/*
+ * RX: loopback with enqueue/dequeue fifos.
+ */
+static void
+kni_net_rx_lo_fifo(struct kni_dev *kni)
+{
+ unsigned ret;
+ uint32_t len;
+ unsigned i, num, num_rq, num_tq, num_aq, num_fq;
+ struct rte_kni_mbuf *kva;
+ struct rte_kni_mbuf *va[MBUF_BURST_SZ];
+ void * data_kva;
+
+ struct rte_kni_mbuf *alloc_kva;
+ struct rte_kni_mbuf *alloc_va[MBUF_BURST_SZ];
+ void *alloc_data_kva;
+
+ /* Get the number of entries in rx_q */
+ num_rq = kni_fifo_count(kni->rx_q);
+
+ /* Get the number of free entrie in tx_q */
+ num_tq = kni_fifo_free_count(kni->tx_q);
+
+ /* Get the number of entries in alloc_q */
+ num_aq = kni_fifo_count(kni->alloc_q);
+
+ /* Get the number of free entries in free_q */
+ num_fq = kni_fifo_free_count(kni->free_q);
+
+ /* Calculate the number of entries to be dequeued from rx_q */
+ num = min(num_rq, num_tq);
+ num = min(num, num_aq);
+ num = min(num, num_fq);
+ num = min(num, (unsigned)MBUF_BURST_SZ);
+
+ /* Return if no entry to dequeue from rx_q */
+ if (num == 0)
+ return;
+
+ /* Burst dequeue from rx_q */
+ ret = kni_fifo_get(kni->rx_q, (void **)va, num);
+ if (ret == 0)
+ return; /* Failing should not happen */
+
+ /* Dequeue entries from alloc_q */
+ ret = kni_fifo_get(kni->alloc_q, (void **)alloc_va, num);
+ if (ret) {
+ num = ret;
+ /* Copy mbufs */
+ for (i = 0; i < num; i++) {
+ kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+ len = kva->pkt_len;
+ data_kva = kva->buf_addr + kva->data_off -
+ kni->mbuf_va + kni->mbuf_kva;
+
+ alloc_kva = (void *)alloc_va[i] - kni->mbuf_va +
+ kni->mbuf_kva;
+ alloc_data_kva = alloc_kva->buf_addr +
+ alloc_kva->data_off - kni->mbuf_va +
+ kni->mbuf_kva;
+ memcpy(alloc_data_kva, data_kva, len);
+ alloc_kva->pkt_len = len;
+ alloc_kva->data_len = len;
+
+ kni->stats.tx_bytes += len;
+ kni->stats.rx_bytes += len;
+ }
+
+ /* Burst enqueue mbufs into tx_q */
+ ret = kni_fifo_put(kni->tx_q, (void **)alloc_va, num);
+ if (ret != num)
+ /* Failing should not happen */
+ KNI_ERR("Fail to enqueue mbufs into tx_q\n");
+ }
+
+ /* Burst enqueue mbufs into free_q */
+ ret = kni_fifo_put(kni->free_q, (void **)va, num);
+ if (ret != num)
+ /* Failing should not happen */
+ KNI_ERR("Fail to enqueue mbufs into free_q\n");
+
+ /**
+ * Update statistic, and enqueue/dequeue failure is impossible,
+ * as all queues are checked at first.
+ */
+ kni->stats.tx_packets += num;
+ kni->stats.rx_packets += num;
+}
+
+/*
+ * RX: loopback with enqueue/dequeue fifos and sk buffer copies.
+ */
+static void
+kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
+{
+ unsigned ret;
+ uint32_t len;
+ unsigned i, num_rq, num_fq, num;
+ struct rte_kni_mbuf *kva;
+ struct rte_kni_mbuf *va[MBUF_BURST_SZ];
+ void * data_kva;
+
+ struct sk_buff *skb;
+ struct net_device *dev = kni->net_dev;
+
+ /* Get the number of entries in rx_q */
+ num_rq = kni_fifo_count(kni->rx_q);
+
+ /* Get the number of free entries in free_q */
+ num_fq = kni_fifo_free_count(kni->free_q);
+
+ /* Calculate the number of entries to dequeue from rx_q */
+ num = min(num_rq, num_fq);
+ num = min(num, (unsigned)MBUF_BURST_SZ);
+
+ /* Return if no entry to dequeue from rx_q */
+ if (num == 0)
+ return;
+
+ /* Burst dequeue mbufs from rx_q */
+ ret = kni_fifo_get(kni->rx_q, (void **)va, num);
+ if (ret == 0)
+ return;
+
+ /* Copy mbufs to sk buffer and then call tx interface */
+ for (i = 0; i < num; i++) {
+ kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+ len = kva->data_len;
+ data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va +
+ kni->mbuf_kva;
+
+ skb = dev_alloc_skb(len + 2);
+ if (skb == NULL)
+ KNI_ERR("Out of mem, dropping pkts\n");
+ else {
+ /* Align IP on 16B boundary */
+ skb_reserve(skb, 2);
+ memcpy(skb_put(skb, len), data_kva, len);
+ skb->dev = dev;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ dev_kfree_skb(skb);
+ }
+
+ /* Simulate real usage, allocate/copy skb twice */
+ skb = dev_alloc_skb(len + 2);
+ if (skb == NULL) {
+ KNI_ERR("Out of mem, dropping pkts\n");
+ kni->stats.rx_dropped++;
+ }
+ else {
+ /* Align IP on 16B boundary */
+ skb_reserve(skb, 2);
+ memcpy(skb_put(skb, len), data_kva, len);
+ skb->dev = dev;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ kni->stats.rx_bytes += len;
+ kni->stats.rx_packets++;
+
+ /* call tx interface */
+ kni_net_tx(skb, dev);
+ }
+ }
+
+ /* enqueue all the mbufs from rx_q into free_q */
+ ret = kni_fifo_put(kni->free_q, (void **)&va, num);
+ if (ret != num)
+ /* Failing should not happen */
+ KNI_ERR("Fail to enqueue mbufs into free_q\n");
+}
+
+/* rx interface */
+void
+kni_net_rx(struct kni_dev *kni)
+{
+ /**
+ * It doesn't need to check if it is NULL pointer,
+ * as it has a default value
+ */
+ (*kni_net_rx_func)(kni);
+}
+
+/*
+ * Transmit a packet (called by the kernel)
+ */
+#ifdef RTE_KNI_VHOST
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+ struct kni_dev *kni = netdev_priv(dev);
+
+ dev_kfree_skb(skb);
+ kni->stats.tx_dropped++;
+
+ return NETDEV_TX_OK;
+}
+#else
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+ int len = 0;
+ unsigned ret;
+ struct kni_dev *kni = netdev_priv(dev);
+ struct rte_kni_mbuf *pkt_kva = NULL;
+ struct rte_kni_mbuf *pkt_va = NULL;
+
+ dev->trans_start = jiffies; /* save the timestamp */
+
+ /* Check if the length of skb is less than mbuf size */
+ if (skb->len > kni->mbuf_size)
+ goto drop;
+
+ /**
+ * Check if it has at least one free entry in tx_q and
+ * one entry in alloc_q.
+ */
+ if (kni_fifo_free_count(kni->tx_q) == 0 ||
+ kni_fifo_count(kni->alloc_q) == 0) {
+ /**
+ * If no free entry in tx_q or no entry in alloc_q,
+ * drops skb and goes out.
+ */
+ goto drop;
+ }
+
+ /* dequeue a mbuf from alloc_q */
+ ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1);
+ if (likely(ret == 1)) {
+ void *data_kva;
+
+ pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
+ data_kva = pkt_kva->buf_addr + pkt_kva->data_off - kni->mbuf_va
+ + kni->mbuf_kva;
+
+ len = skb->len;
+ memcpy(data_kva, skb->data, len);
+ if (unlikely(len < ETH_ZLEN)) {
+ memset(data_kva + len, 0, ETH_ZLEN - len);
+ len = ETH_ZLEN;
+ }
+ pkt_kva->pkt_len = len;
+ pkt_kva->data_len = len;
+
+ /* enqueue mbuf into tx_q */
+ ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
+ if (unlikely(ret != 1)) {
+ /* Failing should not happen */
+ KNI_ERR("Fail to enqueue mbuf into tx_q\n");
+ goto drop;
+ }
+ } else {
+ /* Failing should not happen */
+ KNI_ERR("Fail to dequeue mbuf from alloc_q\n");
+ goto drop;
+ }
+
+ /* Free skb and update statistics */
+ dev_kfree_skb(skb);
+ kni->stats.tx_bytes += len;
+ kni->stats.tx_packets++;
+
+ return NETDEV_TX_OK;
+
+drop:
+ /* Free skb and update statistics */
+ dev_kfree_skb(skb);
+ kni->stats.tx_dropped++;
+
+ return NETDEV_TX_OK;
+}
+#endif
+
+/*
+ * Deal with a transmit timeout.
+ */
+static void
+kni_net_tx_timeout (struct net_device *dev)
+{
+ struct kni_dev *kni = netdev_priv(dev);
+
+ KNI_DBG("Transmit timeout at %ld, latency %ld\n", jiffies,
+ jiffies - dev->trans_start);
+
+ kni->stats.tx_errors++;
+ netif_wake_queue(dev);
+ return;
+}
+
+/*
+ * Ioctl commands
+ */
+static int
+kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+ KNI_DBG("kni_net_ioctl %d\n",
+ ((struct kni_dev *)netdev_priv(dev))->group_id);
+
+ return 0;
+}
+
+static void
+kni_net_set_rx_mode(struct net_device *dev)
+{
+}
+
+static int
+kni_net_change_mtu(struct net_device *dev, int new_mtu)
+{
+ int ret;
+ struct rte_kni_request req;
+ struct kni_dev *kni = netdev_priv(dev);
+
+ KNI_DBG("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
+
+ memset(&req, 0, sizeof(req));
+ req.req_id = RTE_KNI_REQ_CHANGE_MTU;
+ req.new_mtu = new_mtu;
+ ret = kni_net_process_request(kni, &req);
+ if (ret == 0 && req.result == 0)
+ dev->mtu = new_mtu;
+
+ return (ret == 0) ? req.result : ret;
+}
+
+/*
+ * Checks if the user space application provided the resp message
+ */
+void
+kni_net_poll_resp(struct kni_dev *kni)
+{
+ if (kni_fifo_count(kni->resp_q))
+ wake_up_interruptible(&kni->wq);
+}
+
+/*
+ * It can be called to process the request.
+ */
+static int
+kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
+{
+ int ret = -1;
+ void *resp_va;
+ unsigned num;
+ int ret_val;
+
+ if (!kni || !req) {
+ KNI_ERR("No kni instance or request\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&kni->sync_lock);
+
+ /* Construct data */
+ memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
+ num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
+ if (num < 1) {
+ KNI_ERR("Cannot send to req_q\n");
+ ret = -EBUSY;
+ goto fail;
+ }
+
+ ret_val = wait_event_interruptible_timeout(kni->wq,
+ kni_fifo_count(kni->resp_q), 3 * HZ);
+ if (signal_pending(current) || ret_val <= 0) {
+ ret = -ETIME;
+ goto fail;
+ }
+ num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
+ if (num != 1 || resp_va != kni->sync_va) {
+ /* This should never happen */
+ KNI_ERR("No data in resp_q\n");
+ ret = -ENODATA;
+ goto fail;
+ }
+
+ memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
+ ret = 0;
+
+fail:
+ mutex_unlock(&kni->sync_lock);
+ return ret;
+}
+
+/*
+ * Return statistics to the caller
+ */
+static struct net_device_stats *
+kni_net_stats(struct net_device *dev)
+{
+ struct kni_dev *kni = netdev_priv(dev);
+ return &kni->stats;
+}
+
+/*
+ * Fill the eth header
+ */
+static int
+kni_net_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
+{
+ struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+
+ memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len);
+ memcpy(eth->h_dest, daddr ? daddr : dev->dev_addr, dev->addr_len);
+ eth->h_proto = htons(type);
+
+ return dev->hard_header_len;
+}
+
+
+/*
+ * Re-fill the eth header
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0))
+static int
+kni_net_rebuild_header(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct ethhdr *eth = (struct ethhdr *) skb->data;
+
+ memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+ memcpy(eth->h_dest, dev->dev_addr, dev->addr_len);
+
+ return 0;
+}
+#endif /* < 4.1.0 */
+
+/**
+ * kni_net_set_mac - Change the Ethernet Address of the KNI NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int kni_net_set_mac(struct net_device *netdev, void *p)
+{
+ struct sockaddr *addr = p;
+ if (!is_valid_ether_addr((unsigned char *)(addr->sa_data)))
+ return -EADDRNOTAVAIL;
+ memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+ return 0;
+}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0))
+static int kni_net_change_carrier(struct net_device *dev, bool new_carrier)
+{
+ if (new_carrier)
+ netif_carrier_on(dev);
+ else
+ netif_carrier_off(dev);
+ return 0;
+}
+#endif
+
+static const struct header_ops kni_net_header_ops = {
+ .create = kni_net_header,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0))
+ .rebuild = kni_net_rebuild_header,
+#endif /* < 4.1.0 */
+ .cache = NULL, /* disable caching */
+};
+
+static const struct net_device_ops kni_net_netdev_ops = {
+ .ndo_open = kni_net_open,
+ .ndo_stop = kni_net_release,
+ .ndo_set_config = kni_net_config,
+ .ndo_start_xmit = kni_net_tx,
+ .ndo_change_mtu = kni_net_change_mtu,
+ .ndo_do_ioctl = kni_net_ioctl,
+ .ndo_set_rx_mode = kni_net_set_rx_mode,
+ .ndo_get_stats = kni_net_stats,
+ .ndo_tx_timeout = kni_net_tx_timeout,
+ .ndo_set_mac_address = kni_net_set_mac,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0))
+ .ndo_change_carrier = kni_net_change_carrier,
+#endif
+};
+
+void
+kni_net_init(struct net_device *dev)
+{
+ struct kni_dev *kni = netdev_priv(dev);
+
+ KNI_DBG("kni_net_init\n");
+
+ init_waitqueue_head(&kni->wq);
+ mutex_init(&kni->sync_lock);
+
+ ether_setup(dev); /* assign some of the fields */
+ dev->netdev_ops = &kni_net_netdev_ops;
+ dev->header_ops = &kni_net_header_ops;
+ dev->watchdog_timeo = WD_TIMEOUT;
+}
+
+void
+kni_net_config_lo_mode(char *lo_str)
+{
+ if (!lo_str) {
+ KNI_PRINT("loopback disabled");
+ return;
+ }
+
+ if (!strcmp(lo_str, "lo_mode_none"))
+ KNI_PRINT("loopback disabled");
+ else if (!strcmp(lo_str, "lo_mode_fifo")) {
+ KNI_PRINT("loopback mode=lo_mode_fifo enabled");
+ kni_net_rx_func = kni_net_rx_lo_fifo;
+ } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) {
+ KNI_PRINT("loopback mode=lo_mode_fifo_skb enabled");
+ kni_net_rx_func = kni_net_rx_lo_fifo_skb;
+ } else
+ KNI_PRINT("Incognizant parameter, loopback disabled");
+}
diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c b/lib/librte_eal/linuxapp/kni/kni_vhost.c
new file mode 100644
index 00000000..a3ca8499
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_vhost.c
@@ -0,0 +1,857 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <net/sock.h>
+#include <linux/virtio_net.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/nsproxy.h>
+#include <linux/sched.h>
+#include <linux/if_tun.h>
+#include <linux/version.h>
+
+#include "compat.h"
+#include "kni_dev.h"
+#include "kni_fifo.h"
+
+#define RX_BURST_SZ 4
+
+extern void put_unused_fd(unsigned int fd);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)
+extern struct file*
+sock_alloc_file(struct socket *sock,
+ int flags, const char *dname);
+
+extern int get_unused_fd_flags(unsigned flags);
+
+extern void fd_install(unsigned int fd, struct file *file);
+
+static int kni_sock_map_fd(struct socket *sock)
+{
+ struct file *file;
+ int fd = get_unused_fd_flags(0);
+ if (fd < 0)
+ return fd;
+
+ file = sock_alloc_file(sock, 0, NULL);
+ if (IS_ERR(file)) {
+ put_unused_fd(fd);
+ return PTR_ERR(file);
+ }
+ fd_install(fd, file);
+ return fd;
+}
+#else
+#define kni_sock_map_fd(s) sock_map_fd(s, 0)
+#endif
+
+static struct proto kni_raw_proto = {
+ .name = "kni_vhost",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct kni_vhost_queue),
+};
+
+static inline int
+kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
+ unsigned offset, unsigned len)
+{
+ struct rte_kni_mbuf *pkt_kva = NULL;
+ struct rte_kni_mbuf *pkt_va = NULL;
+ int ret;
+
+ KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n",
+#ifdef HAVE_IOV_ITER_MSGHDR
+ offset, len, (int)m->msg_iter.iov->iov_len);
+#else
+ offset, len, (int)m->msg_iov->iov_len);
+#endif
+
+ /**
+ * Check if it has at least one free entry in tx_q and
+ * one entry in alloc_q.
+ */
+ if (kni_fifo_free_count(kni->tx_q) == 0 ||
+ kni_fifo_count(kni->alloc_q) == 0) {
+ /**
+ * If no free entry in tx_q or no entry in alloc_q,
+ * drops skb and goes out.
+ */
+ goto drop;
+ }
+
+ /* dequeue a mbuf from alloc_q */
+ ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1);
+ if (likely(ret == 1)) {
+ void *data_kva;
+
+ pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
+ data_kva = pkt_kva->buf_addr + pkt_kva->data_off
+ - kni->mbuf_va + kni->mbuf_kva;
+
+#ifdef HAVE_IOV_ITER_MSGHDR
+ copy_from_iter(data_kva, len, &m->msg_iter);
+#else
+ memcpy_fromiovecend(data_kva, m->msg_iov, offset, len);
+#endif
+
+ if (unlikely(len < ETH_ZLEN)) {
+ memset(data_kva + len, 0, ETH_ZLEN - len);
+ len = ETH_ZLEN;
+ }
+ pkt_kva->pkt_len = len;
+ pkt_kva->data_len = len;
+
+ /* enqueue mbuf into tx_q */
+ ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
+ if (unlikely(ret != 1)) {
+ /* Failing should not happen */
+ KNI_ERR("Fail to enqueue mbuf into tx_q\n");
+ goto drop;
+ }
+ } else {
+ /* Failing should not happen */
+ KNI_ERR("Fail to dequeue mbuf from alloc_q\n");
+ goto drop;
+ }
+
+ /* update statistics */
+ kni->stats.tx_bytes += len;
+ kni->stats.tx_packets++;
+
+ return 0;
+
+drop:
+ /* update statistics */
+ kni->stats.tx_dropped++;
+
+ return 0;
+}
+
+static inline int
+kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
+ unsigned offset, unsigned len)
+{
+ uint32_t pkt_len;
+ struct rte_kni_mbuf *kva;
+ struct rte_kni_mbuf *va;
+ void * data_kva;
+ struct sk_buff *skb;
+ struct kni_vhost_queue *q = kni->vhost_queue;
+
+ if (unlikely(q == NULL))
+ return 0;
+
+ /* ensure at least one entry in free_q */
+ if (unlikely(kni_fifo_free_count(kni->free_q) == 0))
+ return 0;
+
+ skb = skb_dequeue(&q->sk.sk_receive_queue);
+ if (unlikely(skb == NULL))
+ return 0;
+
+ kva = (struct rte_kni_mbuf*)skb->data;
+
+ /* free skb to cache */
+ skb->data = NULL;
+ if (unlikely(1 != kni_fifo_put(q->fifo, (void **)&skb, 1)))
+ /* Failing should not happen */
+ KNI_ERR("Fail to enqueue entries into rx cache fifo\n");
+
+ pkt_len = kva->data_len;
+ if (unlikely(pkt_len > len))
+ goto drop;
+
+ KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
+#ifdef HAVE_IOV_ITER_MSGHDR
+ offset, len, pkt_len, (int)m->msg_iter.iov->iov_len);
+#else
+ offset, len, pkt_len, (int)m->msg_iov->iov_len);
+#endif
+
+ data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva;
+#ifdef HAVE_IOV_ITER_MSGHDR
+ if (unlikely(copy_to_iter(data_kva, pkt_len, &m->msg_iter)))
+#else
+ if (unlikely(memcpy_toiovecend(m->msg_iov, data_kva, offset, pkt_len)))
+#endif
+ goto drop;
+
+ /* Update statistics */
+ kni->stats.rx_bytes += pkt_len;
+ kni->stats.rx_packets++;
+
+ /* enqueue mbufs into free_q */
+ va = (void*)kva - kni->mbuf_kva + kni->mbuf_va;
+ if (unlikely(1 != kni_fifo_put(kni->free_q, (void **)&va, 1)))
+ /* Failing should not happen */
+ KNI_ERR("Fail to enqueue entries into free_q\n");
+
+ KNI_DBG_RX("receive done %d\n", pkt_len);
+
+ return pkt_len;
+
+drop:
+ /* Update drop statistics */
+ kni->stats.rx_dropped++;
+
+ return 0;
+}
+
+static unsigned int
+kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait)
+{
+ struct kni_vhost_queue *q =
+ container_of(sock->sk, struct kni_vhost_queue, sk);
+ struct kni_dev *kni;
+ unsigned int mask = 0;
+
+ if (unlikely(q == NULL || q->kni == NULL))
+ return POLLERR;
+
+ kni = q->kni;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
+ KNI_DBG("start kni_poll on group %d, wq 0x%16llx\n",
+ kni->group_id, (uint64_t)sock->wq);
+#else
+ KNI_DBG("start kni_poll on group %d, wait at 0x%16llx\n",
+ kni->group_id, (uint64_t)&sock->wait);
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
+ poll_wait(file, &sock->wq->wait, wait);
+#else
+ poll_wait(file, &sock->wait, wait);
+#endif
+
+ if (kni_fifo_count(kni->rx_q) > 0)
+ mask |= POLLIN | POLLRDNORM;
+
+ if (sock_writeable(&q->sk) ||
+#ifdef SOCKWQ_ASYNC_NOSPACE
+ (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) &&
+#else
+ (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) &&
+#endif
+ sock_writeable(&q->sk)))
+ mask |= POLLOUT | POLLWRNORM;
+
+ return mask;
+}
+
+static inline void
+kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q,
+ struct sk_buff *skb, struct rte_kni_mbuf *va)
+{
+ struct rte_kni_mbuf *kva;
+
+ kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva;
+ (skb)->data = (unsigned char*)kva;
+ (skb)->len = kva->data_len;
+ skb_queue_tail(&q->sk.sk_receive_queue, skb);
+}
+
+static inline void
+kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q,
+ struct sk_buff **skb, struct rte_kni_mbuf **va)
+{
+ int i;
+ for (i = 0; i < RX_BURST_SZ; skb++, va++, i++)
+ kni_vhost_enqueue(kni, q, *skb, *va);
+}
+
+int
+kni_chk_vhost_rx(struct kni_dev *kni)
+{
+ struct kni_vhost_queue *q = kni->vhost_queue;
+ unsigned nb_in, nb_mbuf, nb_skb;
+ const unsigned BURST_MASK = RX_BURST_SZ - 1;
+ unsigned nb_burst, nb_backlog, i;
+ struct sk_buff *skb[RX_BURST_SZ];
+ struct rte_kni_mbuf *va[RX_BURST_SZ];
+
+ if (unlikely(BE_STOP & kni->vq_status)) {
+ kni->vq_status |= BE_FINISH;
+ return 0;
+ }
+
+ if (unlikely(q == NULL))
+ return 0;
+
+ nb_skb = kni_fifo_count(q->fifo);
+ nb_mbuf = kni_fifo_count(kni->rx_q);
+
+ nb_in = min(nb_mbuf, nb_skb);
+ nb_in = min(nb_in, (unsigned)RX_BURST_SZ);
+ nb_burst = (nb_in & ~BURST_MASK);
+ nb_backlog = (nb_in & BURST_MASK);
+
+ /* enqueue skb_queue per BURST_SIZE bulk */
+ if (0 != nb_burst) {
+ if (unlikely(RX_BURST_SZ != kni_fifo_get(
+ kni->rx_q, (void **)&va,
+ RX_BURST_SZ)))
+ goto except;
+
+ if (unlikely(RX_BURST_SZ != kni_fifo_get(
+ q->fifo, (void **)&skb,
+ RX_BURST_SZ)))
+ goto except;
+
+ kni_vhost_enqueue_burst(kni, q, skb, va);
+ }
+
+ /* all leftover, do one by one */
+ for (i = 0; i < nb_backlog; ++i) {
+ if (unlikely(1 != kni_fifo_get(
+ kni->rx_q,(void **)&va, 1)))
+ goto except;
+
+ if (unlikely(1 != kni_fifo_get(
+ q->fifo, (void **)&skb, 1)))
+ goto except;
+
+ kni_vhost_enqueue(kni, q, *skb, *va);
+ }
+
+ /* Ondemand wake up */
+ if ((nb_in == RX_BURST_SZ) || (nb_skb == 0) ||
+ ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) {
+ wake_up_interruptible_poll(sk_sleep(&q->sk),
+ POLLIN | POLLRDNORM | POLLRDBAND);
+ KNI_DBG_RX("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n",
+ nb_mbuf, nb_skb, nb_in);
+ }
+
+ return 0;
+
+except:
+ /* Failing should not happen */
+ KNI_ERR("Fail to enqueue fifo, it shouldn't happen \n");
+ BUG_ON(1);
+
+ return 0;
+}
+
+static int
+#ifdef HAVE_KIOCB_MSG_PARAM
+kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len)
+#else
+kni_sock_sndmsg(struct socket *sock,
+ struct msghdr *m, size_t total_len)
+#endif /* HAVE_KIOCB_MSG_PARAM */
+{
+ struct kni_vhost_queue *q =
+ container_of(sock->sk, struct kni_vhost_queue, sk);
+ int vnet_hdr_len = 0;
+ unsigned long len = total_len;
+
+ if (unlikely(q == NULL || q->kni == NULL))
+ return 0;
+
+ KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
+#ifdef HAVE_IOV_ITER_MSGHDR
+ len, q->flags, (int)m->msg_iter.iov->iov_len);
+#else
+ len, q->flags, (int)m->msg_iovlen);
+#endif
+
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+ if (likely(q->flags & IFF_VNET_HDR)) {
+ vnet_hdr_len = q->vnet_hdr_sz;
+ if (unlikely(len < vnet_hdr_len))
+ return -EINVAL;
+ len -= vnet_hdr_len;
+ }
+#endif
+
+ if (unlikely(len < ETH_HLEN + q->vnet_hdr_sz))
+ return -EINVAL;
+
+ return kni_vhost_net_tx(q->kni, m, vnet_hdr_len, len);
+}
+
+static int
+#ifdef HAVE_KIOCB_MSG_PARAM
+kni_sock_rcvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t len, int flags)
+#else
+kni_sock_rcvmsg(struct socket *sock,
+ struct msghdr *m, size_t len, int flags)
+#endif /* HAVE_KIOCB_MSG_PARAM */
+{
+ int vnet_hdr_len = 0;
+ int pkt_len = 0;
+ struct kni_vhost_queue *q =
+ container_of(sock->sk, struct kni_vhost_queue, sk);
+ static struct virtio_net_hdr
+ __attribute__ ((unused)) vnet_hdr = {
+ .flags = 0,
+ .gso_type = VIRTIO_NET_HDR_GSO_NONE
+ };
+
+ if (unlikely(q == NULL || q->kni == NULL))
+ return 0;
+
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+ if (likely(q->flags & IFF_VNET_HDR)) {
+ vnet_hdr_len = q->vnet_hdr_sz;
+ if ((len -= vnet_hdr_len) < 0)
+ return -EINVAL;
+ }
+#endif
+
+ if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni,
+ m, vnet_hdr_len, len))))
+ return 0;
+
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+ /* no need to copy hdr when no pkt received */
+#ifdef HAVE_IOV_ITER_MSGHDR
+ if (unlikely(copy_to_iter((void *)&vnet_hdr, vnet_hdr_len,
+ &m->msg_iter)))
+#else
+ if (unlikely(memcpy_toiovecend(m->msg_iov,
+ (void *)&vnet_hdr, 0, vnet_hdr_len)))
+#endif /* HAVE_IOV_ITER_MSGHDR */
+ return -EFAULT;
+#endif /* RTE_KNI_VHOST_VNET_HDR_EN */
+ KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
+ (unsigned long)len, q->flags, pkt_len);
+
+ return pkt_len + vnet_hdr_len;
+}
+
+/* dummy tap like ioctl */
+static int
+kni_sock_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct ifreq __user *ifr = argp;
+ unsigned int __user *up = argp;
+ struct kni_vhost_queue *q =
+ container_of(sock->sk, struct kni_vhost_queue, sk);
+ struct kni_dev *kni;
+ unsigned int u;
+ int __user *sp = argp;
+ int s;
+ int ret;
+
+ KNI_DBG("tap ioctl cmd 0x%08x\n", cmd);
+
+ switch (cmd) {
+ case TUNSETIFF:
+ KNI_DBG("TUNSETIFF\n");
+ /* ignore the name, just look at flags */
+ if (get_user(u, &ifr->ifr_flags))
+ return -EFAULT;
+
+ ret = 0;
+ if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP))
+ ret = -EINVAL;
+ else
+ q->flags = u;
+
+ return ret;
+
+ case TUNGETIFF:
+ KNI_DBG("TUNGETIFF\n");
+ rcu_read_lock_bh();
+ kni = rcu_dereference_bh(q->kni);
+ if (kni)
+ dev_hold(kni->net_dev);
+ rcu_read_unlock_bh();
+
+ if (!kni)
+ return -ENOLINK;
+
+ ret = 0;
+ if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) ||
+ put_user(q->flags, &ifr->ifr_flags))
+ ret = -EFAULT;
+ dev_put(kni->net_dev);
+ return ret;
+
+ case TUNGETFEATURES:
+ KNI_DBG("TUNGETFEATURES\n");
+ u = IFF_TAP | IFF_NO_PI;
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+ u |= IFF_VNET_HDR;
+#endif
+ if (put_user(u, up))
+ return -EFAULT;
+ return 0;
+
+ case TUNSETSNDBUF:
+ KNI_DBG("TUNSETSNDBUF\n");
+ if (get_user(u, up))
+ return -EFAULT;
+
+ q->sk.sk_sndbuf = u;
+ return 0;
+
+ case TUNGETVNETHDRSZ:
+ s = q->vnet_hdr_sz;
+ if (put_user(s, sp))
+ return -EFAULT;
+ KNI_DBG("TUNGETVNETHDRSZ %d\n", s);
+ return 0;
+
+ case TUNSETVNETHDRSZ:
+ if (get_user(s, sp))
+ return -EFAULT;
+ if (s < (int)sizeof(struct virtio_net_hdr))
+ return -EINVAL;
+
+ KNI_DBG("TUNSETVNETHDRSZ %d\n", s);
+ q->vnet_hdr_sz = s;
+ return 0;
+
+ case TUNSETOFFLOAD:
+ KNI_DBG("TUNSETOFFLOAD %lx\n", arg);
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+ /* not support any offload yet */
+ if (!(q->flags & IFF_VNET_HDR))
+ return -EINVAL;
+
+ return 0;
+#else
+ return -EINVAL;
+#endif
+
+ default:
+ KNI_DBG("NOT SUPPORT\n");
+ return -EINVAL;
+ }
+}
+
+static int
+kni_sock_compat_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ /* 32 bits app on 64 bits OS to be supported later */
+ KNI_PRINT("Not implemented.\n");
+
+ return -EINVAL;
+}
+
+#define KNI_VHOST_WAIT_WQ_SAFE() \
+do { \
+ while ((BE_FINISH | BE_STOP) == kni->vq_status) \
+ msleep(1); \
+}while(0) \
+
+
+static int
+kni_sock_release(struct socket *sock)
+{
+ struct kni_vhost_queue *q =
+ container_of(sock->sk, struct kni_vhost_queue, sk);
+ struct kni_dev *kni;
+
+ if (q == NULL)
+ return 0;
+
+ if (NULL != (kni = q->kni)) {
+ kni->vq_status = BE_STOP;
+ KNI_VHOST_WAIT_WQ_SAFE();
+ kni->vhost_queue = NULL;
+ q->kni = NULL;
+ }
+
+ if (q->sockfd != -1)
+ q->sockfd = -1;
+
+ sk_set_socket(&q->sk, NULL);
+ sock->sk = NULL;
+
+ sock_put(&q->sk);
+
+ KNI_DBG("dummy sock release done\n");
+
+ return 0;
+}
+
+int
+kni_sock_getname (struct socket *sock,
+ struct sockaddr *addr,
+ int *sockaddr_len, int peer)
+{
+ KNI_DBG("dummy sock getname\n");
+ ((struct sockaddr_ll*)addr)->sll_family = AF_PACKET;
+ return 0;
+}
+
+static const struct proto_ops kni_socket_ops = {
+ .getname = kni_sock_getname,
+ .sendmsg = kni_sock_sndmsg,
+ .recvmsg = kni_sock_rcvmsg,
+ .release = kni_sock_release,
+ .poll = kni_sock_poll,
+ .ioctl = kni_sock_ioctl,
+ .compat_ioctl = kni_sock_compat_ioctl,
+};
+
+static void
+kni_sk_write_space(struct sock *sk)
+{
+ wait_queue_head_t *wqueue;
+
+ if (!sock_writeable(sk) ||
+#ifdef SOCKWQ_ASYNC_NOSPACE
+ !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
+#else
+ !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+#endif
+ return;
+ wqueue = sk_sleep(sk);
+ if (wqueue && waitqueue_active(wqueue))
+ wake_up_interruptible_poll(
+ wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
+}
+
+static void
+kni_sk_destruct(struct sock *sk)
+{
+ struct kni_vhost_queue *q =
+ container_of(sk, struct kni_vhost_queue, sk);
+
+ if (!q)
+ return;
+
+ /* make sure there's no packet in buffer */
+ while (skb_dequeue(&sk->sk_receive_queue) != NULL)
+ ;
+
+ mb();
+
+ if (q->fifo != NULL) {
+ kfree(q->fifo);
+ q->fifo = NULL;
+ }
+
+ if (q->cache != NULL) {
+ kfree(q->cache);
+ q->cache = NULL;
+ }
+}
+
+static int
+kni_vhost_backend_init(struct kni_dev *kni)
+{
+ struct kni_vhost_queue *q;
+ struct net *net = current->nsproxy->net_ns;
+ int err, i, sockfd;
+ struct rte_kni_fifo *fifo;
+ struct sk_buff *elem;
+
+ if (kni->vhost_queue != NULL)
+ return -1;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+ q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
+ &kni_raw_proto, 0);
+#else
+ q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
+ &kni_raw_proto);
+#endif
+ if (!q)
+ return -ENOMEM;
+
+ err = sock_create_lite(AF_UNSPEC, SOCK_RAW, IPPROTO_RAW, &q->sock);
+ if (err)
+ goto free_sk;
+
+ sockfd = kni_sock_map_fd(q->sock);
+ if (sockfd < 0) {
+ err = sockfd;
+ goto free_sock;
+ }
+
+ /* cache init */
+ q->cache = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff),
+ GFP_KERNEL);
+ if (!q->cache)
+ goto free_fd;
+
+ fifo = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(void *)
+ + sizeof(struct rte_kni_fifo), GFP_KERNEL);
+ if (!fifo)
+ goto free_cache;
+
+ kni_fifo_init(fifo, RTE_KNI_VHOST_MAX_CACHE_SIZE);
+
+ for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) {
+ elem = &q->cache[i];
+ kni_fifo_put(fifo, (void**)&elem, 1);
+ }
+ q->fifo = fifo;
+
+ /* store sockfd in vhost_queue */
+ q->sockfd = sockfd;
+
+ /* init socket */
+ q->sock->type = SOCK_RAW;
+ q->sock->state = SS_CONNECTED;
+ q->sock->ops = &kni_socket_ops;
+ sock_init_data(q->sock, &q->sk);
+
+ /* init sock data */
+ q->sk.sk_write_space = kni_sk_write_space;
+ q->sk.sk_destruct = kni_sk_destruct;
+ q->flags = IFF_NO_PI | IFF_TAP;
+ q->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+ q->flags |= IFF_VNET_HDR;
+#endif
+
+ /* bind kni_dev with vhost_queue */
+ q->kni = kni;
+ kni->vhost_queue = q;
+
+ wmb();
+
+ kni->vq_status = BE_START;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
+ KNI_DBG("backend init sockfd=%d, sock->wq=0x%16llx,"
+ "sk->sk_wq=0x%16llx",
+ q->sockfd, (uint64_t)q->sock->wq,
+ (uint64_t)q->sk.sk_wq);
+#else
+ KNI_DBG("backend init sockfd=%d, sock->wait at 0x%16llx,"
+ "sk->sk_sleep=0x%16llx",
+ q->sockfd, (uint64_t)&q->sock->wait,
+ (uint64_t)q->sk.sk_sleep);
+#endif
+
+ return 0;
+
+free_cache:
+ kfree(q->cache);
+ q->cache = NULL;
+
+free_fd:
+ put_unused_fd(sockfd);
+
+free_sock:
+ q->kni = NULL;
+ kni->vhost_queue = NULL;
+ kni->vq_status |= BE_FINISH;
+ sock_release(q->sock);
+ q->sock->ops = NULL;
+ q->sock = NULL;
+
+free_sk:
+ sk_free((struct sock*)q);
+
+ return err;
+}
+
+/* kni vhost sock sysfs */
+static ssize_t
+show_sock_fd(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *net_dev = container_of(dev, struct net_device, dev);
+ struct kni_dev *kni = netdev_priv(net_dev);
+ int sockfd = -1;
+ if (kni->vhost_queue != NULL)
+ sockfd = kni->vhost_queue->sockfd;
+ return snprintf(buf, 10, "%d\n", sockfd);
+}
+
+static ssize_t
+show_sock_en(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *net_dev = container_of(dev, struct net_device, dev);
+ struct kni_dev *kni = netdev_priv(net_dev);
+ return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1));
+}
+
+static ssize_t
+set_sock_en(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct net_device *net_dev = container_of(dev, struct net_device, dev);
+ struct kni_dev *kni = netdev_priv(net_dev);
+ unsigned long en;
+ int err = 0;
+
+ if (0 != kstrtoul(buf, 0, &en))
+ return -EINVAL;
+
+ if (en)
+ err = kni_vhost_backend_init(kni);
+
+ return err ? err : count;
+}
+
+static DEVICE_ATTR(sock_fd, S_IRUGO | S_IRUSR, show_sock_fd, NULL);
+static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en);
+static struct attribute *dev_attrs[] = {
+ &dev_attr_sock_fd.attr,
+ &dev_attr_sock_en.attr,
+ NULL,
+};
+
+static const struct attribute_group dev_attr_grp = {
+ .attrs = dev_attrs,
+};
+
+int
+kni_vhost_backend_release(struct kni_dev *kni)
+{
+ struct kni_vhost_queue *q = kni->vhost_queue;
+
+ if (q == NULL)
+ return 0;
+
+ /* dettach from kni */
+ q->kni = NULL;
+
+ KNI_DBG("release backend done\n");
+
+ return 0;
+}
+
+int
+kni_vhost_init(struct kni_dev *kni)
+{
+ struct net_device *dev = kni->net_dev;
+
+ if (sysfs_create_group(&dev->dev.kobj, &dev_attr_grp))
+ sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
+
+ kni->vq_status = BE_STOP;
+
+ KNI_DBG("kni_vhost_init done\n");
+
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_eal/linuxapp/xen_dom0/Makefile
new file mode 100644
index 00000000..9d22fb97
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_dom0/Makefile
@@ -0,0 +1,56 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = rte_dom0_mm
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+MODULE_CFLAGS += -Wall -Werror
+
+# this lib needs main eal
+DEPDIRS-y += lib/librte_eal/linuxapp/eal
+
+#
+# all source are stored in SRCS-y
+#
+
+SRCS-y += dom0_mm_misc.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/xen_dom0/compat.h b/lib/librte_eal/linuxapp/xen_dom0/compat.h
new file mode 100644
index 00000000..e6eb97f2
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_dom0/compat.h
@@ -0,0 +1,15 @@
+/*
+ * Minimal wrappers to allow compiling xen_dom0 on older kernels.
+ */
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
+ (!(defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
+
+#define kstrtoul strict_strtoul
+
+#endif /* < 2.6.39 */
diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h
new file mode 100644
index 00000000..9d5ffb22
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h
@@ -0,0 +1,107 @@
+/*-
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _DOM0_MM_DEV_H_
+#define _DOM0_MM_DEV_H_
+
+#include <linux/wait.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <exec-env/rte_dom0_common.h>
+
+#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/
+#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/
+#define MAX_MEMBLOCK_SIZE (2 * DOM0_MEMBLOCK_SIZE)
+#define MAX_NUM_ORDER (DOM0_CONTIG_NUM_ORDER + 1)
+#define SIZE_PER_BLOCK 2 /**< Size of memory block (2MB).*/
+
+/**
+ * A structure describing the private information for a dom0 device.
+ */
+struct dom0_mm_dev {
+ struct miscdevice miscdev;
+ uint8_t fail_times;
+ uint32_t used_memsize;
+ uint32_t num_mem_ctx;
+ uint32_t config_memsize;
+ uint32_t num_bigblock;
+ struct dom0_mm_data *mm_data[NUM_MEM_CTX];
+ struct mutex data_lock;
+};
+
+struct dom0_mm_data{
+ uint32_t refcnt;
+ uint32_t num_memseg; /**< Number of memory segment. */
+ uint32_t mem_size; /**< Size of requesting memory. */
+
+ char name[DOM0_NAME_MAX];
+
+ /** Store global memory block IDs used by an instance */
+ uint32_t block_num[DOM0_NUM_MEMBLOCK];
+
+ /** Store memory block information.*/
+ struct memblock_info block_info[DOM0_NUM_MEMBLOCK];
+
+ /** Store memory segment information.*/
+ struct memseg_info seg_info[DOM0_NUM_MEMSEG];
+};
+
+#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args)
+#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args)
+#endif
diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c
new file mode 100644
index 00000000..79630bad
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c
@@ -0,0 +1,780 @@
+/*-
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/version.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/xen-ops.h>
+#include <xen/interface/memory.h>
+
+#include <exec-env/rte_dom0_common.h>
+
+#include "compat.h"
+#include "dom0_mm_dev.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0");
+
+static struct dom0_mm_dev dom0_dev;
+static struct kobject *dom0_kobj = NULL;
+
+static struct memblock_info *rsv_mm_info;
+
+/* Default configuration for reserved memory size(2048 MB). */
+static uint32_t rsv_memsize = 2048;
+
+static int dom0_open(struct inode *inode, struct file *file);
+static int dom0_release(struct inode *inode, struct file *file);
+static int dom0_ioctl(struct file *file, unsigned int ioctl_num,
+ unsigned long ioctl_param);
+static int dom0_mmap(struct file *file, struct vm_area_struct *vma);
+static int dom0_memory_free(uint32_t size);
+static int dom0_memory_release(struct dom0_mm_data *mm_data);
+
+static const struct file_operations data_fops = {
+ .owner = THIS_MODULE,
+ .open = dom0_open,
+ .release = dom0_release,
+ .mmap = dom0_mmap,
+ .unlocked_ioctl = (void *)dom0_ioctl,
+};
+
+static ssize_t
+show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return snprintf(buf, 10, "%u\n", dom0_dev.used_memsize);
+}
+
+static ssize_t
+show_memsize(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize);
+}
+
+static ssize_t
+store_memsize(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int err = 0;
+ unsigned long mem_size;
+
+ if (0 != kstrtoul(buf, 0, &mem_size))
+ return -EINVAL;
+
+ mutex_lock(&dom0_dev.data_lock);
+ if (0 == mem_size) {
+ err = -EINVAL;
+ goto fail;
+ } else if (mem_size > (rsv_memsize - dom0_dev.used_memsize)) {
+ XEN_ERR("configure memory size fail\n");
+ err = -EINVAL;
+ goto fail;
+ } else
+ dom0_dev.config_memsize = mem_size;
+
+fail:
+ mutex_unlock(&dom0_dev.data_lock);
+ return err ? err : count;
+}
+
+static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize);
+static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL);
+
+static struct attribute *dev_attrs[] = {
+ &dev_attr_memsize.attr,
+ &dev_attr_memsize_rsvd.attr,
+ NULL,
+};
+
+/* the memory size unit is MB */
+static const struct attribute_group dev_attr_grp = {
+ .name = "memsize-mB",
+ .attrs = dev_attrs,
+};
+
+
+static void
+sort_viraddr(struct memblock_info *mb, int cnt)
+{
+ int i,j;
+ uint64_t tmp_pfn;
+ uint64_t tmp_viraddr;
+
+ /*sort virtual address and pfn */
+ for(i = 0; i < cnt; i ++) {
+ for(j = cnt - 1; j > i; j--) {
+ if(mb[j].pfn < mb[j - 1].pfn) {
+ tmp_pfn = mb[j - 1].pfn;
+ mb[j - 1].pfn = mb[j].pfn;
+ mb[j].pfn = tmp_pfn;
+
+ tmp_viraddr = mb[j - 1].vir_addr;
+ mb[j - 1].vir_addr = mb[j].vir_addr;
+ mb[j].vir_addr = tmp_viraddr;
+ }
+ }
+ }
+}
+
+static int
+dom0_find_memdata(const char * mem_name)
+{
+ unsigned i;
+ int idx = -1;
+ for(i = 0; i< NUM_MEM_CTX; i++) {
+ if(dom0_dev.mm_data[i] == NULL)
+ continue;
+ if (!strncmp(dom0_dev.mm_data[i]->name, mem_name,
+ sizeof(char) * DOM0_NAME_MAX)) {
+ idx = i;
+ break;
+ }
+ }
+
+ return idx;
+}
+
+static int
+dom0_find_mempos(void)
+{
+ unsigned i;
+ int idx = -1;
+
+ for(i = 0; i< NUM_MEM_CTX; i++) {
+ if(dom0_dev.mm_data[i] == NULL){
+ idx = i;
+ break;
+ }
+ }
+
+ return idx;
+}
+
+static int
+dom0_memory_release(struct dom0_mm_data *mm_data)
+{
+ int idx;
+ uint32_t num_block, block_id;
+
+ /* each memory block is 2M */
+ num_block = mm_data->mem_size / SIZE_PER_BLOCK;
+ if (num_block == 0)
+ return -EINVAL;
+
+ /* reset global memory data */
+ idx = dom0_find_memdata(mm_data->name);
+ if (idx >= 0) {
+ dom0_dev.used_memsize -= mm_data->mem_size;
+ dom0_dev.mm_data[idx] = NULL;
+ dom0_dev.num_mem_ctx--;
+ }
+
+ /* reset these memory blocks status as free */
+ for (idx = 0; idx < num_block; idx++) {
+ block_id = mm_data->block_num[idx];
+ rsv_mm_info[block_id].used = 0;
+ }
+
+ memset(mm_data, 0, sizeof(struct dom0_mm_data));
+ vfree(mm_data);
+ return 0;
+}
+
+static int
+dom0_memory_free(uint32_t rsv_size)
+{
+ uint64_t vstart, vaddr;
+ uint32_t i, num_block, size;
+
+ if (!xen_pv_domain())
+ return -1;
+
+ /* each memory block is 2M */
+ num_block = rsv_size / SIZE_PER_BLOCK;
+ if (num_block == 0)
+ return -EINVAL;
+
+ /* free all memory blocks of size of 4M and destroy contiguous region */
+ for (i = 0; i < dom0_dev.num_bigblock * 2; i += 2) {
+ vstart = rsv_mm_info[i].vir_addr;
+ if (vstart) {
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
+ if (rsv_mm_info[i].exchange_flag)
+ xen_destroy_contiguous_region(vstart,
+ DOM0_CONTIG_NUM_ORDER);
+ if (rsv_mm_info[i + 1].exchange_flag)
+ xen_destroy_contiguous_region(vstart +
+ DOM0_MEMBLOCK_SIZE,
+ DOM0_CONTIG_NUM_ORDER);
+ #else
+ if (rsv_mm_info[i].exchange_flag)
+ xen_destroy_contiguous_region(rsv_mm_info[i].pfn
+ * PAGE_SIZE,
+ DOM0_CONTIG_NUM_ORDER);
+ if (rsv_mm_info[i + 1].exchange_flag)
+ xen_destroy_contiguous_region(rsv_mm_info[i].pfn
+ * PAGE_SIZE + DOM0_MEMBLOCK_SIZE,
+ DOM0_CONTIG_NUM_ORDER);
+ #endif
+
+ size = DOM0_MEMBLOCK_SIZE * 2;
+ vaddr = vstart;
+ while (size > 0) {
+ ClearPageReserved(virt_to_page(vaddr));
+ vaddr += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ free_pages(vstart, MAX_NUM_ORDER);
+ }
+ }
+
+ /* free all memory blocks size of 2M and destroy contiguous region */
+ for (; i < num_block; i++) {
+ vstart = rsv_mm_info[i].vir_addr;
+ if (vstart) {
+ if (rsv_mm_info[i].exchange_flag)
+ xen_destroy_contiguous_region(vstart,
+ DOM0_CONTIG_NUM_ORDER);
+
+ size = DOM0_MEMBLOCK_SIZE;
+ vaddr = vstart;
+ while (size > 0) {
+ ClearPageReserved(virt_to_page(vaddr));
+ vaddr += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ free_pages(vstart, DOM0_CONTIG_NUM_ORDER);
+ }
+ }
+
+ memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
+ vfree(rsv_mm_info);
+ rsv_mm_info = NULL;
+
+ return 0;
+}
+
+static void
+find_free_memory(uint32_t count, struct dom0_mm_data *mm_data)
+{
+ uint32_t i = 0;
+ uint32_t j = 0;
+
+ while ((i < count) && (j < rsv_memsize / SIZE_PER_BLOCK)) {
+ if (rsv_mm_info[j].used == 0) {
+ mm_data->block_info[i].pfn = rsv_mm_info[j].pfn;
+ mm_data->block_info[i].vir_addr =
+ rsv_mm_info[j].vir_addr;
+ mm_data->block_info[i].mfn = rsv_mm_info[j].mfn;
+ mm_data->block_info[i].exchange_flag =
+ rsv_mm_info[j].exchange_flag;
+ mm_data->block_num[i] = j;
+ rsv_mm_info[j].used = 1;
+ i++;
+ }
+ j++;
+ }
+}
+
+/**
+ * Find all memory segments in which physical addresses are contiguous.
+ */
+static void
+find_memseg(int count, struct dom0_mm_data * mm_data)
+{
+ int i = 0;
+ int j, k, idx = 0;
+ uint64_t zone_len, pfn, num_block;
+
+ while(i < count) {
+ if (mm_data->block_info[i].exchange_flag == 0) {
+ i++;
+ continue;
+ }
+ k = 0;
+ pfn = mm_data->block_info[i].pfn;
+ mm_data->seg_info[idx].pfn = pfn;
+ mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn;
+
+ for (j = i + 1; j < count; j++) {
+
+ /* ignore exchange fail memory block */
+ if (mm_data->block_info[j].exchange_flag == 0)
+ break;
+
+ if (mm_data->block_info[j].pfn !=
+ (mm_data->block_info[j - 1].pfn +
+ DOM0_MEMBLOCK_SIZE / PAGE_SIZE))
+ break;
+ ++k;
+ mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn;
+ }
+
+ num_block = j - i;
+ zone_len = num_block * DOM0_MEMBLOCK_SIZE;
+ mm_data->seg_info[idx].size = zone_len;
+
+ XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len);
+ i = i+ num_block;
+ idx++;
+ if (idx == DOM0_NUM_MEMSEG)
+ break;
+ }
+ mm_data->num_memseg = idx;
+}
+
+static int
+dom0_memory_reserve(uint32_t rsv_size)
+{
+ uint64_t pfn, vstart, vaddr;
+ uint32_t i, num_block, size, allocated_size = 0;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+ dma_addr_t dma_handle;
+#endif
+
+ /* 2M as memory block */
+ num_block = rsv_size / SIZE_PER_BLOCK;
+
+ rsv_mm_info = vmalloc(sizeof(struct memblock_info) * num_block);
+ if (!rsv_mm_info) {
+ XEN_ERR("Unable to allocate device memory information\n");
+ return -ENOMEM;
+ }
+ memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
+
+ /* try alloc size of 4M once */
+ for (i = 0; i < num_block; i += 2) {
+ vstart = (unsigned long)
+ __get_free_pages(GFP_ATOMIC, MAX_NUM_ORDER);
+ if (vstart == 0)
+ break;
+
+ dom0_dev.num_bigblock = i / 2 + 1;
+ allocated_size = SIZE_PER_BLOCK * (i + 2);
+
+ /* size of 4M */
+ size = DOM0_MEMBLOCK_SIZE * 2;
+
+ vaddr = vstart;
+ while (size > 0) {
+ SetPageReserved(virt_to_page(vaddr));
+ vaddr += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+
+ pfn = virt_to_pfn(vstart);
+ rsv_mm_info[i].pfn = pfn;
+ rsv_mm_info[i].vir_addr = vstart;
+ rsv_mm_info[i + 1].pfn =
+ pfn + DOM0_MEMBLOCK_SIZE / PAGE_SIZE;
+ rsv_mm_info[i + 1].vir_addr =
+ vstart + DOM0_MEMBLOCK_SIZE;
+ }
+
+ /*if it failed to alloc 4M, and continue to alloc 2M once */
+ for (; i < num_block; i++) {
+ vstart = (unsigned long)
+ __get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER);
+ if (vstart == 0) {
+ XEN_ERR("allocate memory fail.\n");
+ dom0_memory_free(allocated_size);
+ return -ENOMEM;
+ }
+
+ allocated_size += SIZE_PER_BLOCK;
+
+ size = DOM0_MEMBLOCK_SIZE;
+ vaddr = vstart;
+ while (size > 0) {
+ SetPageReserved(virt_to_page(vaddr));
+ vaddr += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ pfn = virt_to_pfn(vstart);
+ rsv_mm_info[i].pfn = pfn;
+ rsv_mm_info[i].vir_addr = vstart;
+ }
+
+ sort_viraddr(rsv_mm_info, num_block);
+
+ for (i = 0; i< num_block; i++) {
+
+ /*
+ * This API is used to exchage MFN for getting a block of
+ * contiguous physical addresses, its maximum size is 2M.
+ */
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
+ if (xen_create_contiguous_region(rsv_mm_info[i].vir_addr,
+ DOM0_CONTIG_NUM_ORDER, 0) == 0) {
+ #else
+ if (xen_create_contiguous_region(rsv_mm_info[i].pfn * PAGE_SIZE,
+ DOM0_CONTIG_NUM_ORDER, 0, &dma_handle) == 0) {
+ #endif
+ rsv_mm_info[i].exchange_flag = 1;
+ rsv_mm_info[i].mfn =
+ pfn_to_mfn(rsv_mm_info[i].pfn);
+ rsv_mm_info[i].used = 0;
+ } else {
+ XEN_ERR("exchange memeory fail\n");
+ rsv_mm_info[i].exchange_flag = 0;
+ dom0_dev.fail_times++;
+ if (dom0_dev.fail_times > MAX_EXCHANGE_FAIL_TIME) {
+ dom0_memory_free(rsv_size);
+ return -EFAULT;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int
+dom0_prepare_memsegs(struct memory_info *meminfo, struct dom0_mm_data *mm_data)
+{
+ uint32_t num_block;
+ int idx;
+
+ /* check if there is a free name buffer */
+ memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX);
+ mm_data->name[DOM0_NAME_MAX - 1] = '\0';
+ idx = dom0_find_mempos();
+ if (idx < 0)
+ return -1;
+
+ num_block = meminfo->size / SIZE_PER_BLOCK;
+ /* find free memory and new memory segments*/
+ find_free_memory(num_block, mm_data);
+ find_memseg(num_block, mm_data);
+
+ /* update private memory data */
+ mm_data->refcnt++;
+ mm_data->mem_size = meminfo->size;
+
+ /* update global memory data */
+ dom0_dev.mm_data[idx] = mm_data;
+ dom0_dev.num_mem_ctx++;
+ dom0_dev.used_memsize += mm_data->mem_size;
+
+ return 0;
+}
+
+static int
+dom0_check_memory (struct memory_info *meminfo)
+{
+ int idx;
+ uint64_t mem_size;
+
+ /* round memory size to the next even number. */
+ if (meminfo->size % 2)
+ ++meminfo->size;
+
+ mem_size = meminfo->size;
+ if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) {
+ XEN_ERR("Memory data space is full in Dom0 driver\n");
+ return -1;
+ }
+ idx = dom0_find_memdata(meminfo->name);
+ if (idx >= 0) {
+ XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n",
+ meminfo->name);
+ return -1;
+ }
+ if ((dom0_dev.used_memsize + mem_size) > rsv_memsize) {
+ XEN_ERR("Total size can't be larger than reserved size.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int __init
+dom0_init(void)
+{
+ if (!xen_domain())
+ return -ENODEV;
+
+ if (rsv_memsize > DOM0_CONFIG_MEMSIZE) {
+ XEN_ERR("The reserved memory size cannot be greater than %d\n",
+ DOM0_CONFIG_MEMSIZE);
+ return -EINVAL;
+ }
+
+ /* Setup the misc device */
+ dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR;
+ dom0_dev.miscdev.name = "dom0_mm";
+ dom0_dev.miscdev.fops = &data_fops;
+
+ /* register misc char device */
+ if (misc_register(&dom0_dev.miscdev) != 0) {
+ XEN_ERR("Misc device registration failed\n");
+ return -EPERM;
+ }
+
+ mutex_init(&dom0_dev.data_lock);
+ dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj);
+
+ if (!dom0_kobj) {
+ XEN_ERR("dom0-mm object creation failed\n");
+ misc_deregister(&dom0_dev.miscdev);
+ return -ENOMEM;
+ }
+
+ if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) {
+ kobject_put(dom0_kobj);
+ misc_deregister(&dom0_dev.miscdev);
+ return -EPERM;
+ }
+
+ if (dom0_memory_reserve(rsv_memsize) < 0) {
+ sysfs_remove_group(dom0_kobj, &dev_attr_grp);
+ kobject_put(dom0_kobj);
+ misc_deregister(&dom0_dev.miscdev);
+ return -ENOMEM;
+ }
+
+ XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n");
+
+ return 0;
+}
+
+static void __exit
+dom0_exit(void)
+{
+ if (rsv_mm_info != NULL)
+ dom0_memory_free(rsv_memsize);
+
+ sysfs_remove_group(dom0_kobj, &dev_attr_grp);
+ kobject_put(dom0_kobj);
+ misc_deregister(&dom0_dev.miscdev);
+
+ XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n");
+}
+
+static int
+dom0_open(struct inode *inode, struct file *file)
+{
+ file->private_data = NULL;
+
+ XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n");
+ return 0;
+}
+
+static int
+dom0_release(struct inode *inode, struct file *file)
+{
+ int ret = 0;
+ struct dom0_mm_data *mm_data = file->private_data;
+
+ if (mm_data == NULL)
+ return ret;
+
+ mutex_lock(&dom0_dev.data_lock);
+ if (--mm_data->refcnt == 0)
+ ret = dom0_memory_release(mm_data);
+ mutex_unlock(&dom0_dev.data_lock);
+
+ file->private_data = NULL;
+ XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n");
+ return ret;
+}
+
+static int
+dom0_mmap(struct file *file, struct vm_area_struct *vm)
+{
+ int status = 0;
+ uint32_t idx = vm->vm_pgoff;
+ uint64_t pfn, size = vm->vm_end - vm->vm_start;
+ struct dom0_mm_data *mm_data = file->private_data;
+
+ if(mm_data == NULL)
+ return -EINVAL;
+
+ mutex_lock(&dom0_dev.data_lock);
+ if (idx >= mm_data->num_memseg) {
+ mutex_unlock(&dom0_dev.data_lock);
+ return -EINVAL;
+ }
+
+ if (size > mm_data->seg_info[idx].size){
+ mutex_unlock(&dom0_dev.data_lock);
+ return -EINVAL;
+ }
+
+ XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size);
+
+ pfn = mm_data->seg_info[idx].pfn;
+ mutex_unlock(&dom0_dev.data_lock);
+
+ status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED);
+
+ return status;
+}
+static int
+dom0_ioctl(struct file *file,
+ unsigned int ioctl_num,
+ unsigned long ioctl_param)
+{
+ int idx, ret;
+ char name[DOM0_NAME_MAX] = {0};
+ struct memory_info meminfo;
+ struct dom0_mm_data *mm_data = file->private_data;
+
+ XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param);
+
+ /**
+ * Switch according to the ioctl called
+ */
+ switch _IOC_NR(ioctl_num) {
+ case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG):
+ ret = copy_from_user(&meminfo, (void *)ioctl_param,
+ sizeof(struct memory_info));
+ if (ret)
+ return -EFAULT;
+
+ if (mm_data != NULL) {
+ XEN_ERR("Cannot create memory segment for the same"
+ " file descriptor\n");
+ return -EINVAL;
+ }
+
+ /* Allocate private data */
+ mm_data = vmalloc(sizeof(struct dom0_mm_data));
+ if (!mm_data) {
+ XEN_ERR("Unable to allocate device private data\n");
+ return -ENOMEM;
+ }
+ memset(mm_data, 0, sizeof(struct dom0_mm_data));
+
+ mutex_lock(&dom0_dev.data_lock);
+ /* check if we can allocate memory*/
+ if (dom0_check_memory(&meminfo) < 0) {
+ mutex_unlock(&dom0_dev.data_lock);
+ vfree(mm_data);
+ return -EINVAL;
+ }
+
+ /* allocate memory and created memory segments*/
+ if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) {
+ XEN_ERR("create memory segment fail.\n");
+ mutex_unlock(&dom0_dev.data_lock);
+ return -EIO;
+ }
+
+ file->private_data = mm_data;
+ mutex_unlock(&dom0_dev.data_lock);
+ break;
+
+ /* support multiple process in term of memory mapping*/
+ case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG):
+ ret = copy_from_user(name, (void *)ioctl_param,
+ sizeof(char) * DOM0_NAME_MAX);
+ if (ret)
+ return -EFAULT;
+
+ mutex_lock(&dom0_dev.data_lock);
+ idx = dom0_find_memdata(name);
+ if (idx < 0) {
+ mutex_unlock(&dom0_dev.data_lock);
+ return -EINVAL;
+ }
+
+ mm_data = dom0_dev.mm_data[idx];
+ mm_data->refcnt++;
+ file->private_data = mm_data;
+ mutex_unlock(&dom0_dev.data_lock);
+ break;
+
+ case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG):
+ ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg,
+ sizeof(int));
+ if (ret)
+ return -EFAULT;
+ break;
+
+ case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO):
+ ret = copy_to_user((void *)ioctl_param,
+ &mm_data->seg_info[0],
+ sizeof(struct memseg_info) *
+ mm_data->num_memseg);
+ if (ret)
+ return -EFAULT;
+ break;
+ default:
+ XEN_PRINT("IOCTL default \n");
+ break;
+ }
+
+ return 0;
+}
+
+module_init(dom0_init);
+module_exit(dom0_exit);
+
+module_param(rsv_memsize, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(rsv_memsize, "Xen-dom0 reserved memory size(MB).\n");